From 59b239f7a2c3c1a26a27b9dcc6b3429433f61c3c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 6 Nov 2024 18:33:32 +0100 Subject: [PATCH] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 12 + `rocblas_(s|d|c|z)trmm_64` and `hipblas(S|D|C|Z)trmm(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 320d15fb..64f19bb9 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1736,7 +1736,9 @@ sub rocSubstitutions { subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2_64", "rocblas_ctpsv_64", "library"); subst("cublasCtrmm", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_64", "rocblas_ctrmm_64", "library"); subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_v2_64", "rocblas_ctrmm_64", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); subst("cublasCtrmv_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); @@ -1883,7 +1885,9 @@ sub rocSubstitutions { subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2_64", "rocblas_dtpsv_64", "library"); subst("cublasDtrmm", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_64", "rocblas_dtrmm_64", "library"); subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_v2_64", "rocblas_dtrmm_64", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); subst("cublasDtrmv_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); @@ -2117,7 +2121,9 @@ sub rocSubstitutions { subst("cublasStpsv_v2", "rocblas_stpsv", "library"); subst("cublasStpsv_v2_64", "rocblas_stpsv_64", "library"); subst("cublasStrmm", "rocblas_strmm", "library"); + subst("cublasStrmm_64", "rocblas_strmm_64", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); + subst("cublasStrmm_v2_64", "rocblas_strmm_64", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); subst("cublasStrmv_64", "rocblas_strmv_64", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); @@ -2294,7 +2300,9 @@ sub rocSubstitutions { subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2_64", "rocblas_ztpsv_64", "library"); subst("cublasZtrmm", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_64", "rocblas_ztrmm_64", "library"); subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_v2_64", "rocblas_ztrmm_64", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); subst("cublasZtrmv_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); @@ -4526,7 +4534,9 @@ sub simpleSubstitutions { subst("cublasCtpsv_v2", "hipblasCtpsv_v2", "library"); subst("cublasCtpsv_v2_64", "hipblasCtpsv_v2_64", "library"); subst("cublasCtrmm", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmm_64", "hipblasCtrmm_v2_64", "library"); subst("cublasCtrmm_v2", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmm_v2_64", "hipblasCtrmm_v2_64", "library"); subst("cublasCtrmv", "hipblasCtrmv_v2", "library"); subst("cublasCtrmv_64", "hipblasCtrmv_v2_64", "library"); subst("cublasCtrmv_v2", "hipblasCtrmv_v2", "library"); @@ -4674,7 +4684,9 @@ sub simpleSubstitutions { subst("cublasDtpsv_v2", "hipblasDtpsv", "library"); subst("cublasDtpsv_v2_64", "hipblasDtpsv_64", "library"); subst("cublasDtrmm", "hipblasDtrmm", "library"); + subst("cublasDtrmm_64", "hipblasDtrmm_64", "library"); subst("cublasDtrmm_v2", "hipblasDtrmm", "library"); + subst("cublasDtrmm_v2_64", "hipblasDtrmm_64", "library"); subst("cublasDtrmv", "hipblasDtrmv", "library"); subst("cublasDtrmv_64", "hipblasDtrmv_64", "library"); subst("cublasDtrmv_v2", "hipblasDtrmv", "library"); @@ -4919,7 +4931,9 @@ sub simpleSubstitutions { subst("cublasStpsv_v2", "hipblasStpsv", "library"); subst("cublasStpsv_v2_64", "hipblasStpsv_64", "library"); subst("cublasStrmm", "hipblasStrmm", "library"); + subst("cublasStrmm_64", "hipblasStrmm_64", "library"); subst("cublasStrmm_v2", "hipblasStrmm", "library"); + subst("cublasStrmm_v2_64", "hipblasStrmm_64", "library"); subst("cublasStrmv", "hipblasStrmv", "library"); subst("cublasStrmv_64", "hipblasStrmv_64", "library"); subst("cublasStrmv_v2", "hipblasStrmv", "library"); @@ -5089,7 +5103,9 @@ sub simpleSubstitutions { subst("cublasZtpsv_v2", "hipblasZtpsv_v2", "library"); subst("cublasZtpsv_v2_64", "hipblasZtpsv_v2_64", "library"); subst("cublasZtrmm", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmm_64", "hipblasZtrmm_v2_64", "library"); subst("cublasZtrmm_v2", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmm_v2_64", "hipblasZtrmm_v2_64", "library"); subst("cublasZtrmv", "hipblasZtrmv_v2", "library"); subst("cublasZtrmv_64", "hipblasZtrmv_v2_64", "library"); subst("cublasZtrmv_v2", "hipblasZtrmv_v2", "library"); @@ -11632,8 +11648,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrsm_v2_64", "cublasZtrsm_64", "cublasZtrsmBatched_64", - "cublasZtrmm_v2_64", - "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgemm3m_64", @@ -11655,8 +11669,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrsm_v2_64", "cublasStrsm_64", "cublasStrsmBatched_64", - "cublasStrmm_v2_64", - "cublasStrmm_64", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -11751,8 +11763,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDtrsm_v2_64", "cublasDtrsm_64", "cublasDtrsmBatched_64", - "cublasDtrmm_v2_64", - "cublasDtrmm_64", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", @@ -11762,8 +11772,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCtrsm_v2_64", "cublasCtrsm_64", "cublasCtrsmBatched_64", - "cublasCtrmm_v2_64", - "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkEx_64", "cublasCsyrkEx", @@ -13727,8 +13735,6 @@ sub warnRocOnlyUnsupportedFunctions { "cudnnAddTensor", "cudnnActivationStruct", "cublasZtrttp", - "cublasZtrmm_v2_64", - "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgetrsBatched", @@ -13744,8 +13750,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrmm_v2_64", - "cublasStrmm_64", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -13859,8 +13863,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrmm_v2_64", - "cublasDtrmm_64", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgetrsBatched", @@ -13872,8 +13874,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgelsBatched", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrmm_v2_64", - "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkEx_64", "cublasCsyrkEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 918f7186..7561d5c8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | | |`cublasStrsm_64`|12.0| | | | | | | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index e94c800e..ae05ad0e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0|`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0|`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0|`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0|`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | |`cublasStrsm_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index fdac1cea..faf0f611 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`rocblas_ctrsm`|3.5.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`rocblas_dtrsm`|1.5.0| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`rocblas_strsm`|1.5.0| | | | | |`cublasStrsm_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`rocblas_strsm`|1.5.0| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`rocblas_ztrsm`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 3d12911d..436db2a6 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -551,13 +551,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrmm_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrmm_64", {"hipblasStrmm_64", "rocblas_strmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrmm_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrmm_64", {"hipblasDtrmm_64", "rocblas_dtrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrmm", {"hipblasCtrmm_v2", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrmm_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrmm_64", {"hipblasCtrmm_v2_64", "rocblas_ctrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrmm", {"hipblasZtrmm_v2", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrmm_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrmm_64", {"hipblasZtrmm_v2_64", "rocblas_ztrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) // GEAM @@ -918,13 +918,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasStrmm_v2_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrmm_v2_64", {"hipblasStrmm_64", "rocblas_strmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDtrmm_v2_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrmm_v2_64", {"hipblasDtrmm_64", "rocblas_dtrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrmm_v2", {"hipblasCtrmm_v2", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCtrmm_v2_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrmm_v2_64", {"hipblasCtrmm_v2_64", "rocblas_ctrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrmm_v2", {"hipblasZtrmm_v2", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZtrmm_v2_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrmm_v2_64", {"hipblasZtrmm_v2_64", "rocblas_ztrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // NRM2 {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -2066,6 +2066,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasZgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasChemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZhemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2497,6 +2501,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_chemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zhemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_strmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ctrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ztrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 7783384f..89919766 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3084,6 +3084,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // CHECK: blasStatus = hipblasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // CHECK: blasStatus = hipblasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* A, int64_t lda, const hipComplex* B, int64_t ldb, hipComplex* C, int64_t ldc); + // CHECK: blasStatus = hipblasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* A, int64_t lda, const hipDoubleComplex* B, int64_t ldb, hipDoubleComplex* C, int64_t ldc); + // CHECK: blasStatus = hipblasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index ffbcfb63..5ad41633 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3289,6 +3289,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_strmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_strmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ctrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ctrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0;