From 0815f8c32259858ec4b55c1e89faffe343c9b354 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap Date: Sat, 6 Apr 2024 02:35:49 -0400 Subject: [PATCH] AMDGPU: MC support for v_cvt_scale_[f16|f32]_fp8 of gfx950. OPSEL ASM Syntax: opsel:[x,y,z] where, opsel[x] = Inst{11} = src0_modifier{2} opsel[y] = Inst{12} = src1_modifier{2} opsel[z] = Inst{14} = src0_modifier{3} Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax. Co-authored-by: Pravin Jagtap --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 22 ++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 192 ++++++++++++++++++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 144 +++++++++++++ 5 files changed, 373 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2b8b41371fef8b..5a01001fd906d3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -384,11 +384,17 @@ def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap", "Has v_permlane32_swap_b32 instructions" >; +def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts", + "HasFP8ConversionScaleInsts", + "true", + "Has fp8 conversion scale instructions" +>; + def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", "GFX950Insts", "true", "Additional instructions for GFX950+", - [FeaturePermlane16Swap, FeaturePermlane32Swap] + [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts] >; def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", @@ -1531,7 +1537,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureGFX950Insts, FeaturePrngInst, FeatureBF16ConversionInsts, - FeatureBitOp3Insts + FeatureBitOp3Insts, + FeatureFP8ConversionScaleInsts ])>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -2405,6 +2412,9 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">, def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">, AssemblerPredicate<(all_of FeaturePrngInst)>; +def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>; + def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ece26a4adb3754..c5aaed3a6b9ae8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -50,6 +50,7 @@ class AMDGPUSubtarget { bool GCN3Encoding = false; bool Has16BitInsts = false; bool HasTrue16BitInsts = false; + bool HasFP8ConversionScaleInsts = false; bool EnableRealTrue16Insts = false; bool HasBF16ConversionInsts = false; bool HasMadMixInsts = false; @@ -175,6 +176,8 @@ class AMDGPUSubtarget { return HasMadMixInsts; } + bool hasFP8ConversionScaleInsts() const { return HasFP8ConversionScaleInsts; } + bool hasMadMacF32Insts() const { return HasMadMacF32Insts || !isGCN(); } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 60aaa05d59b40a..2534f26492e6ea 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -874,6 +874,24 @@ class VOP3_BITOP3_Profile : VOP3_Profile let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret); } +class VOP3_CVT_SCALE_F1632_FP8BF8_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 0; + let HasExtVOP3DPP = 0; + let HasOpSel = 1; + let HasOMod = 0; +} + +let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; + defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; +} + let SubtargetPredicate = isGFX10Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile>; @@ -1796,3 +1814,7 @@ defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">; defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">; +let OtherPredicates = [HasFP8ConversionScaleInsts] in { +defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>; +defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>; +} diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index ad1ce40ddd6a45..55ca8f94600995 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -117,3 +117,195 @@ v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 // FIXME: Swapped order not accepted // v_permlane32_swap_b32 v1, v2 fi:1 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index b9a86cbc18247c..063fc70e04b5c6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -35,3 +35,147 @@ # GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30] 0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00