Skip to content

Commit 6a13bbf

Browse files
authored
[AMDGPU][MC] Enables sgpr or imm src1 for float VOP3 DPP, but excludi… (#87382)
…ng VOPC. Fixes support on GFX1150 and GFX12 where src1 of e64_dpp instructions should allow sgpr and imm operands. PR #67461 added support for this with int operands, but it was missing a piece for float. Changing VOPC e64_dpp will be in a different patch because there is a bug preventing that change.
1 parent 1aedf94 commit 6a13bbf

19 files changed

+2055
-19
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4627,10 +4627,15 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
46274627
if (Src1Idx >= 0) {
46284628
const MCOperand &Src1 = Inst.getOperand(Src1Idx);
46294629
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4630-
if (Src1.isImm() ||
4631-
(Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4632-
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4633-
Error(Op.getStartLoc(), "invalid operand for instruction");
4630+
if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4631+
auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4632+
SMLoc S = getRegLoc(Reg, Operands);
4633+
Error(S, "invalid operand for instruction");
4634+
return false;
4635+
}
4636+
if (Src1.isImm()) {
4637+
Error(getInstLoc(Operands),
4638+
"src1 immediate operand invalid for instruction");
46344639
return false;
46354640
}
46364641
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2268,7 +2268,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
22682268
field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
22692269
field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
22702270
field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
2271-
field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
2271+
field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
22722272
field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
22732273
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
22742274
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
503503
dpp8:$dpp8, Dpp8FI:$fi);
504504
let Src2Mod = FP32InputMods; // dummy unused modifiers
505505
let Src2RC64 = VGPRSrc_32; // stub argument
506+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
506507
}
507508
def VOP_MAC_F32 : VOP_MAC <f32>;
508509
let HasExtDPP = 0, HasExt32BitDPP = 0 in
@@ -618,7 +619,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
618619
let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
619620

620621
let Outs32 = (outs DstRC:$vdst);
621-
let Outs64 = (outs DstRC:$vdst);
622+
let Outs64 = (outs DstRC64:$vdst);
622623

623624
// Suppress src2 implied by type since the 32-bit encoding uses an
624625
// implicit VCC use.
@@ -652,7 +653,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
652653
dpp8:$dpp8, Dpp8FI:$fi);
653654

654655
let Src0ModVOP3DPP = FPVRegInputMods;
655-
let Src1ModVOP3DPP = FPVRegInputMods;
656+
let Src1ModVOP3DPP = FP32VCSrcInputMods;
656657

657658
let HasExt = 1;
658659
let HasExtDPP = 1;
@@ -662,7 +663,17 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
662663
}
663664

664665
def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
665-
def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
666+
def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
667+
let IsTrue16 = 1;
668+
let DstRC64 = getVALUDstForVT<DstVT>.ret;
669+
670+
let Src0Mod = getSrcMod<f16>.ret;
671+
let Src1Mod = getSrcMod<f16>.ret;
672+
673+
let Src0VOP3DPP = VGPRSrc_32;
674+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
675+
let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret;
676+
}
666677

667678
def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
668679
let Outs32 = (outs SReg_32:$vdst);
@@ -703,7 +714,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
703714
//===----------------------------------------------------------------------===//
704715

705716
let SubtargetPredicate = isGFX11Plus in
706-
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
717+
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
707718
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
708719
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
709720
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
7575
let HasDst32 = 0;
7676
// VOPC disallows dst_sel and dst_unused as they have no effect on destination
7777
let EmitDstSel = 0;
78+
// FIXME: work around AsmParser bug
79+
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
7880
let Outs64 = (outs VOPDstS64orS32:$sdst);
7981
let OutsVOP3DPP = Outs64;
8082
let OutsVOP3DPP8 = Outs64;
@@ -112,6 +114,8 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
112114
"$src0, $src1");
113115
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
114116
let EmitDst = 0;
117+
// FIXME: work around AsmParser bug
118+
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
115119
}
116120

117121
multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> {
@@ -785,6 +789,8 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
785789
let HasSrc1Mods = 0;
786790
let HasClamp = 0;
787791
let HasOMod = 0;
792+
// FIXME: work around AsmParser bug
793+
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
788794
}
789795

790796
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
@@ -812,6 +818,8 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va
812818
let AsmVOP3Base = "$src0_modifiers, $src1";
813819
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
814820
let EmitDst = 0;
821+
// FIXME: work around AsmParser bug
822+
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
815823
}
816824

817825
multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {

llvm/test/MC/AMDGPU/gfx1150_asm_features.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,13 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
2323

2424
v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
2525
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
26+
27+
v_add_f32_e64_dpp v5, v1, s2 row_mirror
28+
// GFX1150: encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff]
29+
30+
v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf
31+
// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff]
32+
33+
// This is a regression test for potential changes in the future.
34+
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
35+
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]

llvm/test/MC/AMDGPU/gfx11_asm_err.s

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0]
5151
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5252

5353
v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
54-
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
54+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction
5555

5656
v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
5757
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5858

5959
v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
60-
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
60+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction
6161

6262
v_cvt_f32_i32_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
6363
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
@@ -135,7 +135,7 @@ v_fmac_f16_e64_dpp v5, s2, v3 quad_perm:[3,2,1,0]
135135
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
136136

137137
v_fmac_f16_e64_dpp v5, v2, 1.0 quad_perm:[3,2,1,0]
138-
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
138+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction
139139

140140
v_fmac_f32_e64_dpp v5, s2, v3 dpp8:[7,6,5,4,3,2,1,0]
141141
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
@@ -144,7 +144,7 @@ v_fmac_f32_e64_dpp v5, 0x1234, v3 dpp8:[7,6,5,4,3,2,1,0]
144144
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
145145

146146
v_fmac_f32_e64_dpp v5, v2, 1 dpp8:[7,6,5,4,3,2,1,0]
147-
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
147+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction
148148

149149
v_fmac_f32_e64_dpp v5, -1.0, v3 quad_perm:[3,2,1,0]
150150
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

llvm/test/MC/AMDGPU/gfx12_asm_features.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
2323
v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
2424
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
2525

26+
// This is a regression test for potential changes in the future.
27+
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
28+
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
29+
2630
//
2731
// Elements of CPol operand can be given in any order
2832
//

0 commit comments

Comments
 (0)