From b2265ffe2563be2f880dc503d8f67bae6c1e202c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 11 Nov 2024 19:38:18 +0100 Subject: [PATCH] [HIPIFY][SWDEV-493184][6.2.0][device][fix] Added missing support for device intrinsics and built-ins that appeared in HIP `6.2.0` + `__all_sync`, `__any_sync`, `__ballot_sync`, `__activemask`, `__match_any_sync`, `__match_all_sync`, `__shfl_sync`, `__shfl_up_sync`, `__shfl_down_sync`, and `__shfl_xor_sync` --- bin/hipify-perl | 16 ++++++--- .../CUDA_Device_API_supported_by_HIP.md | 14 +++++--- src/CUDA2HIP_Device_functions.cpp | 36 ++++++++++++++++--- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 2f7f923b..e327d367 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -8644,8 +8644,12 @@ sub countSupportedDeviceFunctions { "__short2half_ru", "__short2half_rn", "__short2half_rd", + "__shfl_xor_sync", "__shfl_xor", + "__shfl_up_sync", "__shfl_up", + "__shfl_sync", + "__shfl_down_sync", "__shfl_down", "__shfl", "__saturatef", @@ -8657,6 +8661,8 @@ sub countSupportedDeviceFunctions { "__mulhi", "__mul64hi", "__mul24", + "__match_any_sync", + "__match_all_sync", "__lows2half2", "__lowhigh2highlow", "__low2half2", @@ -8871,11 +8877,15 @@ sub countSupportedDeviceFunctions { "__byte_perm", "__brevll", "__brev", + "__ballot_sync", "__ballot", "__assertfail", "__assert_fail", + "__any_sync", "__any", - "__all" + "__all_sync", + "__all", + "__activemask" ) { # match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...)); @@ -9022,10 +9032,6 @@ sub warnUnsupportedDeviceFunctions { "__short2bfloat16_ru", "__short2bfloat16_rn", "__short2bfloat16_rd", - "__shfl_xor_sync", - "__shfl_up_sync", - "__shfl_sync", - "__shfl_down_sync", "__prof_trigger", "__pm3", "__pm2", diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index 0a3c62ab..50759924 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -5,11 +5,15 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`_Pow_int`| | | | | | | | | | | +|`__activemask`|9.0| | | |`__activemask`|6.2.0| | | | | |`__all`| | | | |`__all`|1.6.0| | | | | +|`__all_sync`|9.0| | | |`__all_sync`|6.2.0| | | | | |`__any`| | | | |`__any`|1.6.0| | | | | +|`__any_sync`|9.0| | | |`__any_sync`|6.2.0| | | | | |`__assert_fail`| | | | |`__assert_fail`|1.9.0| | | | | |`__assertfail`| | | | |`__assertfail`|1.9.0| | | | | |`__ballot`| | | | |`__ballot`|1.6.0| | | | | +|`__ballot_sync`|9.0| | | |`__ballot_sync`|6.2.0| | | | | |`__bfloat1622float2`|11.0| | | | | | | | | | |`__bfloat162bfloat162`|11.0| | | | | | | | | | |`__bfloat162char_rz`|12.2| | | | | | | | | | @@ -361,6 +365,8 @@ |`__lowhigh2highlow`| | | | |`__lowhigh2highlow`|1.6.0| | | | | |`__lows2bfloat162`|11.0| | | | | | | | | | |`__lows2half2`| | | | |`__lows2half2`|1.6.0| | | | | +|`__match_all_sync`|9.0| | | |`__match_all_sync`|6.2.0| | | | | +|`__match_any_sync`|9.0| | | |`__match_any_sync`|6.2.0| | | | | |`__mul24`| | | | |`__mul24`|1.6.0| | | | | |`__mul64hi`| | | | |`__mul64hi`|1.6.0| | | | | |`__mulhi`| | | | |`__mulhi`|1.6.0| | | | | @@ -387,12 +393,12 @@ |`__saturatef`| | | | |`__saturatef`|1.6.0| | | | | |`__shfl`|7.5|9.0| | |`__shfl`|1.6.0| | | | | |`__shfl_down`|7.5|9.0| | |`__shfl_down`|1.6.0| | | | | -|`__shfl_down_sync`| | | | | | | | | | | -|`__shfl_sync`| | | | | | | | | | | +|`__shfl_down_sync`|9.0| | | |`__shfl_down_sync`|6.2.0| | | | | +|`__shfl_sync`|9.0| | | |`__shfl_sync`|6.2.0| | | | | |`__shfl_up`|7.5|9.0| | |`__shfl_up`|1.6.0| | | | | -|`__shfl_up_sync`| | | | | | | | | | | +|`__shfl_up_sync`|9.0| | | |`__shfl_up_sync`|6.2.0| | | | | |`__shfl_xor`|7.5|9.0| | |`__shfl_xor`|1.6.0| | | | | -|`__shfl_xor_sync`| | | | | | | | | | | +|`__shfl_xor_sync`|9.0| | | |`__shfl_xor_sync`|6.2.0| | | | | |`__short2bfloat16_rd`|11.0| | | | | | | | | | |`__short2bfloat16_rn`|11.0| | | | | | | | | | |`__short2bfloat16_ru`|11.0| | | | | | | | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index ed2e4a49..469d1e60 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -672,13 +672,13 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_sync", {"__shfl_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_up_sync", {"__shfl_up_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_down_sync", {"__shfl_down_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_xor_sync", {"__shfl_xor_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -829,6 +829,14 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + // intrinsics + {"__all_sync", {"__all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__any_sync", {"__any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ballot_sync", {"__ballot_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__activemask", {"__activemask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + // built-ins + {"__match_any_sync", {"__match_any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__match_all_sync", {"__match_all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, }; const std::map CUDA_DEVICE_FUNCTION_VER_MAP { @@ -959,6 +967,16 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"make_half2", {CUDA_122, CUDA_0, CUDA_0 }}, {"__half2char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, {"__half2uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__all_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__any_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__ballot_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__activemask", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__match_any_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__match_all_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_up_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_down_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_xor_sync", {CUDA_90, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { @@ -1470,6 +1488,16 @@ const std::map HIP_DEVICE_FUNCTION_VER_MAP { {"__hmax_nan", {HIP_5050, HIP_0, HIP_0 }}, {"__hmin", {HIP_5050, HIP_0, HIP_0 }}, {"__hmin_nan", {HIP_5050, HIP_0, HIP_0 }}, + {"__all_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__any_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__ballot_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__activemask", {HIP_6020, HIP_0, HIP_0 }}, + {"__match_any_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__match_all_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_up_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_down_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_xor_sync", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP {