diff --git a/bin/hipify-perl b/bin/hipify-perl index f5a15285..d3797d6a 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6144,6 +6144,16 @@ sub simpleSubstitutions { subst("__half2_raw", "__half2_raw", "device_type"); subst("__half_raw", "__half_raw", "device_type"); subst("__nv_bfloat16", "hip_bfloat16", "device_type"); + subst("__nv_fp8_e4m3", "__hip_fp8_e4m3_fnuz", "device_type"); + subst("__nv_fp8_e5m2", "__hip_fp8_e5m2_fnuz", "device_type"); + subst("__nv_fp8_interpretation_t", "__hip_fp8_interpretation_t", "device_type"); + subst("__nv_fp8_storage_t", "__hip_fp8_storage_t", "device_type"); + subst("__nv_fp8x2_e4m3", "__hip_fp8x2_e4m3_fnuz", "device_type"); + subst("__nv_fp8x2_e5m2", "__hip_fp8x2_e5m2_fnuz", "device_type"); + subst("__nv_fp8x2_storage_t", "__hip_fp8x2_storage_t", "device_type"); + subst("__nv_fp8x4_e4m3", "__hip_fp8x4_e4m3_fnuz", "device_type"); + subst("__nv_fp8x4_storage_t", "__hip_fp8x4_storage_t", "device_type"); + subst("__nv_saturation_t", "__hip_saturation_t", "device_type"); subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include"); subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include"); subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); @@ -7693,6 +7703,11 @@ sub simpleSubstitutions { subst("NVRTC_ERROR_OUT_OF_MEMORY", "HIPRTC_ERROR_OUT_OF_MEMORY", "numeric_literal"); subst("NVRTC_ERROR_PROGRAM_CREATION_FAILURE", "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE", "numeric_literal"); subst("NVRTC_SUCCESS", "HIPRTC_SUCCESS", "numeric_literal"); + subst("__NV_E4M3", "__HIP_E4M3_FNUZ", "numeric_literal"); + subst("__NV_E5M2", "__HIP_E5M2_FNUZ", "numeric_literal"); + subst("__NV_NOSAT", "__HIP_NOSAT", "numeric_literal"); + subst("__NV_SATFINITE", "__HIP_SATFINITE", "numeric_literal"); + subst("__nv_fp8x4_e5m2", "__hip_fp8x4_e5m2_fnuz", "numeric_literal"); subst("cublasLtOrder_t", "hipblasLtOrder_t", "numeric_literal"); subst("cudaAccessPropertyNormal", "hipAccessPropertyNormal", "numeric_literal"); subst("cudaAccessPropertyPersisting", "hipAccessPropertyPersisting", "numeric_literal"); @@ -10384,24 +10399,9 @@ sub warnUnsupportedFunctions { "cl_event_flags", "cl_context_flags_enum", "cl_context_flags", - "__nv_saturation_t", - "__nv_fp8x4_storage_t", - "__nv_fp8x4_e5m2", - "__nv_fp8x4_e4m3", - "__nv_fp8x2_storage_t", - "__nv_fp8x2_e5m2", - "__nv_fp8x2_e4m3", - "__nv_fp8_storage_t", - "__nv_fp8_interpretation_t", - "__nv_fp8_e5m2", - "__nv_fp8_e4m3", "__nv_bfloat16_raw", "__nv_bfloat162_raw", "__nv_bfloat162", - "__NV_SATFINITE", - "__NV_NOSAT", - "__NV_E5M2", - "__NV_E4M3", "__CUB_LP64__", "_CUB_ASM_PTR_SIZE_", "_CUB_ASM_PTR_", diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index 50759924..6ccbff4f 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -809,10 +809,10 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| -|`__NV_E4M3`|11.8| | | | | | | | | | -|`__NV_E5M2`|11.8| | | | | | | | | | -|`__NV_NOSAT`|11.8| | | | | | | | | | -|`__NV_SATFINITE`|11.8| | | | | | | | | | +|`__NV_E4M3`|11.8| | | |`__HIP_E4M3_FNUZ`|6.2.0| | | | | +|`__NV_E5M2`|11.8| | | |`__HIP_E5M2_FNUZ`|6.2.0| | | | | +|`__NV_NOSAT`|11.8| | | |`__HIP_NOSAT`|6.2.0| | | | | +|`__NV_SATFINITE`|11.8| | | |`__HIP_SATFINITE`|6.2.0| | | | | |`__half`| | | | |`__half`|1.6.0| | | | | |`__half2`| | | | |`__half2`|1.6.0| | | | | |`__half2_raw`| | | | |`__half2_raw`|1.9.0| | | | | @@ -821,17 +821,17 @@ |`__nv_bfloat162`|11.0| | | | | | | | | | |`__nv_bfloat162_raw`|11.0| | | | | | | | | | |`__nv_bfloat16_raw`|11.0| | | | | | | | | | -|`__nv_fp8_e4m3`|11.8| | | | | | | | | | -|`__nv_fp8_e5m2`|11.8| | | | | | | | | | -|`__nv_fp8_interpretation_t`|11.8| | | | | | | | | | -|`__nv_fp8_storage_t`|11.8| | | | | | | | | | -|`__nv_fp8x2_e4m3`|11.8| | | | | | | | | | -|`__nv_fp8x2_e5m2`|11.8| | | | | | | | | | -|`__nv_fp8x2_storage_t`|11.8| | | | | | | | | | -|`__nv_fp8x4_e4m3`|11.8| | | | | | | | | | -|`__nv_fp8x4_e5m2`|11.8| | | | | | | | | | -|`__nv_fp8x4_storage_t`|11.8| | | | | | | | | | -|`__nv_saturation_t`|11.8| | | | | | | | | | +|`__nv_fp8_e4m3`|11.8| | | |`__hip_fp8_e4m3_fnuz`|6.2.0| | | | | +|`__nv_fp8_e5m2`|11.8| | | |`__hip_fp8_e5m2_fnuz`|6.2.0| | | | | +|`__nv_fp8_interpretation_t`|11.8| | | |`__hip_fp8_interpretation_t`|6.2.0| | | | | +|`__nv_fp8_storage_t`|11.8| | | |`__hip_fp8_storage_t`|6.2.0| | | | | +|`__nv_fp8x2_e4m3`|11.8| | | |`__hip_fp8x2_e4m3_fnuz`|6.2.0| | | | | +|`__nv_fp8x2_e5m2`|11.8| | | |`__hip_fp8x2_e5m2_fnuz`|6.2.0| | | | | +|`__nv_fp8x2_storage_t`|11.8| | | |`__hip_fp8x2_storage_t`|6.2.0| | | | | +|`__nv_fp8x4_e4m3`|11.8| | | |`__hip_fp8x4_e4m3_fnuz`|6.2.0| | | | | +|`__nv_fp8x4_e5m2`|11.8| | | |`__hip_fp8x4_e5m2_fnuz`|6.2.0| | | | | +|`__nv_fp8x4_storage_t`|11.8| | | |`__hip_fp8x4_storage_t`|6.2.0| | | | | +|`__nv_saturation_t`|11.8| | | |`__hip_saturation_t`|6.2.0| | | | | |`nv_bfloat162`|11.0| | | | | | | | | | diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp index 480c10c0..b06658ec 100644 --- a/src/CUDA2HIP_Device_types.cpp +++ b/src/CUDA2HIP_Device_types.cpp @@ -36,21 +36,21 @@ const std::map CUDA_DEVICE_TYPE_NAME_MAP { {"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, {"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, // float8 Precision Device types - {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x4_storage_t", {"__hip_fp8x4_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8_e5m2", {"__hip_fp8_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x2_e5m2", {"__hip_fp8x2_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8_e4m3", {"__hip_fp8_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x2_e4m3", {"__hip_fp8x2_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x4_e4m3", {"__hip_fp8x4_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_saturation_t", {"__hip_saturation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__NV_NOSAT", {"__HIP_NOSAT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, - {"__NV_SATFINITE", {"__HIP_SATFINITE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, - {"__NV_E4M3", {"__HIP_E4M3", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, - {"__NV_E5M2", {"__HIP_E5M2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, - {"__nv_fp8x4_e5m2", {"__hip_fp8x4_e5m2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8x4_storage_t", {"__hip_fp8x4_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8_e5m2", {"__hip_fp8_e5m2_fnuz", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8x2_e5m2", {"__hip_fp8x2_e5m2_fnuz", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8_e4m3", {"__hip_fp8_e4m3_fnuz", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8x2_e4m3", {"__hip_fp8x2_e4m3_fnuz", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_fp8x4_e4m3", {"__hip_fp8x4_e4m3_fnuz", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__nv_saturation_t", {"__hip_saturation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__NV_NOSAT", {"__HIP_NOSAT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2}}, + {"__NV_SATFINITE", {"__HIP_SATFINITE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2}}, + {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__NV_E4M3", {"__HIP_E4M3_FNUZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2}}, + {"__NV_E5M2", {"__HIP_E5M2_FNUZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2}}, + {"__nv_fp8x4_e5m2", {"__hip_fp8x4_e5m2_fnuz", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2}}, }; const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { @@ -83,6 +83,21 @@ const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { {"__half_raw", {HIP_1090, HIP_0, HIP_0 }}, {"__half2_raw", {HIP_1090, HIP_0, HIP_0 }}, {"hip_bfloat16", {HIP_3050, HIP_0, HIP_0 }}, + {"__hip_fp8_e4m3_fnuz", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8_storage_t", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x2_storage_t", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x4_storage_t", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8_e5m2_fnuz", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x2_e5m2_fnuz", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x2_e4m3_fnuz", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x4_e4m3_fnuz", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_saturation_t", {HIP_6020, HIP_0, HIP_0 }}, + {"__HIP_NOSAT", {HIP_6020, HIP_0, HIP_0 }}, + {"__HIP_SATFINITE", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8_interpretation_t", {HIP_6020, HIP_0, HIP_0 }}, + {"__HIP_E4M3_FNUZ", {HIP_6020, HIP_0, HIP_0 }}, + {"__HIP_E5M2_FNUZ", {HIP_6020, HIP_0, HIP_0 }}, + {"__hip_fp8x4_e5m2_fnuz", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_half", {HIP_1050, HIP_0, HIP_0 }}, {"rocblas_bfloat16", {HIP_3050, HIP_0, HIP_0 }},