AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797

arsenm · 2024-11-26T21:28:55Z

Co-authored-by: Shilei Tian [email protected]

llvmbot · 2024-11-26T21:34:45Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Co-authored-by: Shilei Tian <[email protected]>

Full diff: https://github.com/llvm/llvm-project/pull/117797.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+3)
(modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+20)
(modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+120)
(modified) llvm/test/MC/AMDGPU/gfx950_err.s (+18)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+90)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0938a11077cfb1..119a4d63704777 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2852,6 +2852,9 @@ def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>;
 def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>;
 def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
 def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
+def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
+def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
+def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
 
 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 2c441910fe21a8..9882f1f2a16780 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -967,6 +967,14 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profil
   let HasOMod = 0;
 }
 
+class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
+  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+                          Int32InputMods:$src1_modifiers, Src1RC64:$src1,
+                          FP32InputMods:$src2_modifiers, Src2RC64:$src2,
+                          VGPR_32:$vdst_in, op_sel0:$op_sel);
+}
+
+
 class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
   let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
                           FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -1046,6 +1054,9 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
 
 let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
   let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+    defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
+    defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
+    defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
     defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
     defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
     defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
@@ -1059,6 +1070,9 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
 
 let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
   let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+    defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
+    defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
+    defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
     defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
     defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
     defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
@@ -2131,6 +2145,9 @@ defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
 defm V_BITOP3_B16         : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
 defm V_BITOP3_B32         : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
 let OtherPredicates = [HasFP8ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3OpSel_Real_gfx9<0x246>;
+defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3OpSel_Real_gfx9<0x242>;
+defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3OpSel_Real_gfx9<0x237>;
 defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
 defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
 defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
@@ -2141,6 +2158,9 @@ defm V_CVT_SCALEF32_PK_F16_FP8  : VOP3OpSel_Real_gfx9<0x248>;
 defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>;
 }
 let OtherPredicates = [HasBF8ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3OpSel_Real_gfx9<0x247>;
+defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3OpSel_Real_gfx9<0x243>;
+defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3OpSel_Real_gfx9<0x238>;
 defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
 defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
 defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index dac7c33eaa6b2e..a69f62991485ee 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1407,3 +1407,123 @@ v_cvt_sr_bf16_f32 v0, -v1, v2
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2          ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00]
 v_cvt_sr_bf16_f32 v0, |v1|, v2
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3|
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 55cd57a1bc398e..bc01ea90ebbf49 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -398,3 +398,21 @@ v_cvt_sr_f16_f32 v1, v2, v3 clamp
 
 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_cvt_sr_bf16_f32 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 clamp
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index ee9a7c5d2006fd..87ead3a927c8fb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -1038,3 +1038,93 @@
 
 # GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2          ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00]
 0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04

arsenm · 2024-11-27T00:08:46Z

Merge activity

Nov 26, 7:08 PM EST: A user started a stack merge that includes this pull request via Graphite.
Nov 26, 7:49 PM EST: Graphite rebased this pull request as part of a merge.
Nov 26, 7:52 PM EST: A user merged this pull request with Graphite.

Co-authored-by: Shilei Tian <[email protected]>

arsenm mentioned this pull request Nov 26, 2024

AMDGPU: Builtin & CodeGen support for v_cvt_scalef32_sr_pk_fp4 instructions #117798

Merged

arsenm added the backend:AMDGPU label Nov 26, 2024 — with Graphite App

arsenm requested review from jayfoad, pravinjagtap, rampitec, shiltian, Sisyph and srpande November 26, 2024 21:34

arsenm marked this pull request as ready for review November 26, 2024 21:34

llvmbot added the mc Machine (object) code label Nov 26, 2024

shiltian approved these changes Nov 26, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/gfx950/mc-v_cvt_sr_f16_bf16_f32 branch 2 times, most recently from 9fddf8c to d286f79 Compare November 27, 2024 00:45

Base automatically changed from users/arsenm/gfx950/mc-v_cvt_sr_f16_bf16_f32 to main November 27, 2024 00:48

AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32}

907bf9e

Co-authored-by: Shilei Tian <[email protected]>

arsenm force-pushed the users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32 branch from 9c5b5a8 to 907bf9e Compare November 27, 2024 00:49

arsenm merged commit f87cabe into main Nov 27, 2024
5 of 6 checks passed

arsenm deleted the users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32 branch November 27, 2024 00:52

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797

AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797

arsenm commented Nov 26, 2024

llvmbot commented Nov 26, 2024 •

edited

Loading

arsenm commented Nov 27, 2024 •

edited

Loading

AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797

AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797

Conversation

arsenm commented Nov 26, 2024

llvmbot commented Nov 26, 2024 • edited Loading

arsenm commented Nov 27, 2024 • edited Loading

Merge activity

llvmbot commented Nov 26, 2024 •

edited

Loading

arsenm commented Nov 27, 2024 •

edited

Loading