-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797
Merged
arsenm
merged 1 commit into
main
from
users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32
Nov 27, 2024
Merged
AMDGPU: MC support for v_cvt_scalef32_sr_{bf8|fp8}_{f16|bf16|f32} #117797
arsenm
merged 1 commit into
main
from
users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32
Nov 27, 2024
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Nov 26, 2024
Merged
arsenm
requested review from
jayfoad,
pravinjagtap,
rampitec,
shiltian,
Sisyph and
srpande
November 26, 2024 21:34
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesCo-authored-by: Shilei Tian <[email protected]> Full diff: https://github.com/llvm/llvm-project/pull/117797.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0938a11077cfb1..119a4d63704777 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2852,6 +2852,9 @@ def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>;
def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>;
def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
+def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
+def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
+def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 2c441910fe21a8..9882f1f2a16780 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -967,6 +967,14 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profil
let HasOMod = 0;
}
+class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ Int32InputMods:$src1_modifiers, Src1RC64:$src1,
+ FP32InputMods:$src2_modifiers, Src2RC64:$src2,
+ VGPR_32:$vdst_in, op_sel0:$op_sel);
+}
+
+
class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -1046,6 +1054,9 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+ defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
+ defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
+ defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
@@ -1059,6 +1070,9 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+ defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
+ defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
+ defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
@@ -2131,6 +2145,9 @@ defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
let OtherPredicates = [HasFP8ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3OpSel_Real_gfx9<0x246>;
+defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3OpSel_Real_gfx9<0x242>;
+defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3OpSel_Real_gfx9<0x237>;
defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
@@ -2141,6 +2158,9 @@ defm V_CVT_SCALEF32_PK_F16_FP8 : VOP3OpSel_Real_gfx9<0x248>;
defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>;
}
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3OpSel_Real_gfx9<0x247>;
+defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3OpSel_Real_gfx9<0x243>;
+defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3OpSel_Real_gfx9<0x238>;
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index dac7c33eaa6b2e..a69f62991485ee 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1407,3 +1407,123 @@ v_cvt_sr_bf16_f32 v0, -v1, v2
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00]
v_cvt_sr_bf16_f32 v0, |v1|, v2
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24]
+v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3|
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 55cd57a1bc398e..bc01ea90ebbf49 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -398,3 +398,21 @@ v_cvt_sr_f16_f32 v1, v2, v3 clamp
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_sr_bf16_f32 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 clamp
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index ee9a7c5d2006fd..87ead3a927c8fb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -1038,3 +1038,93 @@
# GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00]
0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x47,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x47,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x43,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x43,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x38,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_bf8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x38,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x46,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_bf16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x46,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x42,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x42,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x24
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84]
+0x00,0x00,0x37,0xd2,0x01,0x05,0x0e,0x84
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x01,0x37,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04
|
shiltian
approved these changes
Nov 26, 2024
arsenm
force-pushed
the
users/arsenm/gfx950/mc-v_cvt_sr_f16_bf16_f32
branch
2 times, most recently
from
November 27, 2024 00:45
9fddf8c
to
d286f79
Compare
Base automatically changed from
users/arsenm/gfx950/mc-v_cvt_sr_f16_bf16_f32
to
main
November 27, 2024 00:48
Co-authored-by: Shilei Tian <[email protected]>
arsenm
force-pushed
the
users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32
branch
from
November 27, 2024 00:49
9c5b5a8
to
907bf9e
Compare
arsenm
deleted the
users/arsenm/gfx950/mc-v_cvt_scalef32_s_bf8_fp8_f16_bf16_f32
branch
November 27, 2024 00:52
This was referenced Nov 27, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Co-authored-by: Shilei Tian [email protected]