From 898738cf23cc08d692f9e741d28f65006e7c9e40 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 11 Nov 2024 17:43:54 +0100 Subject: [PATCH] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 15 + `rocblas_(s|d|c|z)dgmm_64` and `hipblas(S|D|C|Z)dgmm(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 ++-- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 +- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 +- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 +- src/CUDA2HIP_BLAS_API_functions.cpp | 16 +++- .../synthetic/libraries/cublas2hipblas_v2.cu | 23 +++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 83 ++++++++++++------- 7 files changed, 108 insertions(+), 54 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 07e34948..2f7f923b 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1589,6 +1589,7 @@ sub rocSubstitutions { subst("cublasCcopy_v2", "rocblas_ccopy", "library"); subst("cublasCcopy_v2_64", "rocblas_ccopy_64", "library"); subst("cublasCdgmm", "rocblas_cdgmm", "library"); + subst("cublasCdgmm_64", "rocblas_cdgmm_64", "library"); subst("cublasCdotc", "rocblas_cdotc", "library"); subst("cublasCdotc_64", "rocblas_cdotc_64", "library"); subst("cublasCdotc_v2", "rocblas_cdotc", "library"); @@ -1766,6 +1767,7 @@ sub rocSubstitutions { subst("cublasDcopy_v2", "rocblas_dcopy", "library"); subst("cublasDcopy_v2_64", "rocblas_dcopy_64", "library"); subst("cublasDdgmm", "rocblas_ddgmm", "library"); + subst("cublasDdgmm_64", "rocblas_ddgmm_64", "library"); subst("cublasDdot", "rocblas_ddot", "library"); subst("cublasDdot_64", "rocblas_ddot_64", "library"); subst("cublasDdot_v2", "rocblas_ddot", "library"); @@ -1998,6 +2000,7 @@ sub rocSubstitutions { subst("cublasScopy_v2", "rocblas_scopy", "library"); subst("cublasScopy_v2_64", "rocblas_scopy_64", "library"); subst("cublasSdgmm", "rocblas_sdgmm", "library"); + subst("cublasSdgmm_64", "rocblas_sdgmm_64", "library"); subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_64", "rocblas_sdot_64", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); @@ -2155,6 +2158,7 @@ sub rocSubstitutions { subst("cublasZcopy_v2", "rocblas_zcopy", "library"); subst("cublasZcopy_v2_64", "rocblas_zcopy_64", "library"); subst("cublasZdgmm", "rocblas_zdgmm", "library"); + subst("cublasZdgmm_64", "rocblas_zdgmm_64", "library"); subst("cublasZdotc", "rocblas_zdotc", "library"); subst("cublasZdotc_64", "rocblas_zdotc_64", "library"); subst("cublasZdotc_v2", "rocblas_zdotc", "library"); @@ -4383,6 +4387,7 @@ sub simpleSubstitutions { subst("cublasCcopy_v2", "hipblasCcopy_v2", "library"); subst("cublasCcopy_v2_64", "hipblasCcopy_v2_64", "library"); subst("cublasCdgmm", "hipblasCdgmm_v2", "library"); + subst("cublasCdgmm_64", "hipblasCdgmm_v2_64", "library"); subst("cublasCdotc", "hipblasCdotc_v2", "library"); subst("cublasCdotc_64", "hipblasCdotc_v2_64", "library"); subst("cublasCdotc_v2", "hipblasCdotc_v2", "library"); @@ -4564,6 +4569,7 @@ sub simpleSubstitutions { subst("cublasDcopy_v2", "hipblasDcopy", "library"); subst("cublasDcopy_v2_64", "hipblasDcopy_64", "library"); subst("cublasDdgmm", "hipblasDdgmm", "library"); + subst("cublasDdgmm_64", "hipblasDdgmm_64", "library"); subst("cublasDdot", "hipblasDdot", "library"); subst("cublasDdot_64", "hipblasDdot_64", "library"); subst("cublasDdot_v2", "hipblasDdot", "library"); @@ -4810,6 +4816,7 @@ sub simpleSubstitutions { subst("cublasScopy_v2", "hipblasScopy", "library"); subst("cublasScopy_v2_64", "hipblasScopy_64", "library"); subst("cublasSdgmm", "hipblasSdgmm", "library"); + subst("cublasSdgmm_64", "hipblasSdgmm_64", "library"); subst("cublasSdot", "hipblasSdot", "library"); subst("cublasSdot_64", "hipblasSdot_64", "library"); subst("cublasSdot_v2", "hipblasSdot", "library"); @@ -4963,6 +4970,7 @@ sub simpleSubstitutions { subst("cublasZcopy_v2", "hipblasZcopy_v2", "library"); subst("cublasZcopy_v2_64", "hipblasZcopy_v2_64", "library"); subst("cublasZdgmm", "hipblasZdgmm_v2", "library"); + subst("cublasZdgmm_64", "hipblasZdgmm_v2_64", "library"); subst("cublasZdotc", "hipblasZdotc_v2", "library"); subst("cublasZdotc_64", "hipblasZdotc_v2_64", "library"); subst("cublasZdotc_v2", "hipblasZdotc_v2", "library"); @@ -11661,7 +11669,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZmatinvBatched", "cublasZgemm3m_64", "cublasZgemm3m", - "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", "cublasTSTgemvStridedBatched_64", @@ -11689,7 +11696,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetMatrixAsync_64", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSdgmm_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -11770,7 +11776,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", - "cublasDdgmm_64", "cublasCtrttp", "cublasCtpttr", "cublasCsyrkEx_64", @@ -11795,7 +11800,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgemm3mBatched_64", "cublasCgemm3mBatched", "cublasCgemm3m", - "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", @@ -13883,7 +13887,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", - "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", "cublasSwapEx_64", @@ -13908,7 +13911,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetMatrixAsync_64", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSdgmm_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -14011,7 +14013,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgelsBatched", - "cublasDdgmm_64", "cublasCtrttp", "cublasCtpttr", "cublasCsyrkEx_64", @@ -14040,7 +14041,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3mBatched", "cublasCgemm3m", "cublasCgelsBatched", - "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 209b342c..fc9eeafb 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | | | |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`hipblasCdgmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | | |`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`hipblasDdgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | | |`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | | | |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`hipblasSdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | | |`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`hipblasZdgmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | | |`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 3e1caf76..0c847d5d 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | |`rocblas_axpy_ex`|3.9.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | | |`rocblas_axpy_ex_64`|6.1.0| | | | | |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | |`rocblas_cdgmm`|3.5.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`hipblasCdgmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cdgmm_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | |`rocblas_cgeam`|3.5.0| | | | | |`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | | | | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | |`rocblas_ddgmm`|3.5.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`hipblasDdgmm_64`|6.3.0| | | |6.3.0|`rocblas_ddgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | |`rocblas_dgeam`|1.6.4| | | | | |`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0|`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | | | | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | |`rocblas_scal_ex`|4.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | | |`rocblas_scal_ex_64`|6.1.0| | | | | |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | |`rocblas_sdgmm`|3.5.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`hipblasSdgmm_64`|6.3.0| | | |6.3.0|`rocblas_sdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | |`rocblas_sgeam`|1.6.4| | | | | |`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0|`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | | | | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | | | | | | | |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | |`rocblas_zdgmm`|3.5.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`hipblasZdgmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zdgmm_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | |`rocblas_zgeam`|3.5.0| | | | | |`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index fdcbc8e2..12a42598 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`rocblas_axpy_ex`|3.9.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`rocblas_axpy_ex_64`|6.1.0| | | | | |`cublasCdgmm`| | | | |`rocblas_cdgmm`|3.5.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`rocblas_cdgmm_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`rocblas_cgeam`|3.5.0| | | | | |`cublasCgeam_64`|12.0| | | |`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | | | | | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`rocblas_ddgmm`|3.5.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`rocblas_ddgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`rocblas_dgeam`|1.6.4| | | | | |`cublasDgeam_64`|12.0| | | |`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | | | | | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`rocblas_scal_ex`|4.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`rocblas_scal_ex_64`|6.1.0| | | | | |`cublasSdgmm`| | | | |`rocblas_sdgmm`|3.5.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`rocblas_sdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`rocblas_sgeam`|1.6.4| | | | | |`cublasSgeam_64`|12.0| | | |`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | | | | | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | |`cublasZdgmm`| | | | |`rocblas_zdgmm`|3.5.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`rocblas_zdgmm_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`rocblas_zgeam`|3.5.0| | | | | |`cublasZgeam_64`|12.0| | | |`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 7ad123f1..8c6d609f 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -618,13 +618,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // DGMM {"cublasSdgmm", {"hipblasSdgmm", "rocblas_sdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasSdgmm_64", {"hipblasSdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasSdgmm_64", {"hipblasSdgmm_64", "rocblas_sdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasDdgmm", {"hipblasDdgmm", "rocblas_ddgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDdgmm_64", {"hipblasDdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasDdgmm_64", {"hipblasDdgmm_64", "rocblas_ddgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasCdgmm", {"hipblasCdgmm_v2", "rocblas_cdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasCdgmm_64", {"hipblasCdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasCdgmm_64", {"hipblasCdgmm_v2_64", "rocblas_cdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasZdgmm", {"hipblasZdgmm_v2", "rocblas_zdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasZdgmm_64", {"hipblasZdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasZdgmm_64", {"hipblasZdgmm_v2_64", "rocblas_zdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, // TPTTR - Triangular Pack format to Triangular format {"cublasStpttr", {"hipblasStpttr", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2078,6 +2078,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDtrsmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2513,6 +2517,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_ctrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_ztrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ddgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 014a8104..6678c6d5 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -231,6 +231,7 @@ int main() { float fy1 = 0.0f; float fc = 0.0f; float fC = 0.0f; + float fCP = 0.0f; float fs = 0.0f; float fd1 = 0.0f; float fd2 = 0.0f; @@ -251,6 +252,7 @@ int main() { double da = 0.0f; double dA = 0.0f; + double dAP = 0.0f; double db = 0.0f; double dB = 0.0f; double dx = 0.0f; @@ -259,6 +261,7 @@ int main() { double dy1 = 0.0f; double dc = 0.0f; double dC = 0.0f; + double dCP = 0.0f; double ds = 0.0f; double dd1 = 0.0f; double dd2 = 0.0f; @@ -3160,6 +3163,26 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, hipDoubleComplex* const BP[], int64_t ldb, int64_t batchCount); // CHECK: blasStatus = hipblasZtrsmBatched_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdgmm_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const float* AP, int64_t lda, const float* x, int64_t incx, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + blasStatus = cublasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdgmm_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const double* AP, int64_t lda, const double* x, int64_t incx, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + blasStatus = cublasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdgmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCdgmm_v2_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + blasStatus = cublasCdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZdgmm_v2_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 5ad41633..b301e719 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -246,22 +246,23 @@ int main() { // CHECK: blasStatus = rocblas_get_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); blasStatus = cublasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); - float fa = 0; - float fA = 0; + float fa = 0.0f; + float fA = 0.0f; float fAP = 0.0f; - float fb = 0; - float fB = 0; - float fx = 0; - float fx1 = 0; - float fy = 0; - float fy1 = 0; - float fc = 0; - float fC = 0; - float fs = 0; - float fd1 = 0; - float fd2 = 0; - float fresult = 0; - float fparam = 0; + float fb = 0.0f; + float fB = 0.0f; + float fx = 0.0f; + float fx1 = 0.0f; + float fy = 0.0f; + float fy1 = 0.0f; + float fc = 0.0f; + float fC = 0.0f; + float fCP = 0.0f; + float fs = 0.0f; + float fd1 = 0.0f; + float fd2 = 0.0f; + float fresult = 0.0f; + float fparam = 0.0f; float** fAarray = nullptr; const float** const fAarray_const = const_cast(fAarray); @@ -275,21 +276,23 @@ int main() { const float** const fCarray_const = const_cast(fCarray); float** fTauarray = nullptr; - double da = 0; - double dA = 0; - double db = 0; - double dB = 0; - double dx = 0; - double dx1 = 0; - double dy = 0; - double dy1 = 0; - double dc = 0; - double dC = 0; - double ds = 0; - double dd1 = 0; - double dd2 = 0; - double dresult = 0; - double dparam = 0; + double da = 0.0f; + double dA = 0.0f; + double dAP = 0.0f; + double db = 0.0f; + double dB = 0.0f; + double dx = 0.0f; + double dx1 = 0.0f; + double dy = 0.0f; + double dy1 = 0.0f; + double dc = 0.0f; + double dC = 0.0f; + double dCP = 0.0f; + double ds = 0.0f; + double dd1 = 0.0f; + double dd2 = 0.0f; + double dresult = 0.0f; + double dparam = 0.0f; double** dAarray = nullptr; const double** const dAarray_const = const_cast(dAarray); @@ -3317,6 +3320,26 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_sdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + blasStatus = cublasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ddgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + blasStatus = cublasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + blasStatus = cublasCdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); #endif return 0;