Skip to content

Commit 88e0b25

Browse files
committed
AMDGPU/SI: Add 64-bit versions of v_nop and v_clrexcp
Summary: The assembly printing of these is still missing the encoding size suffix, but this will be fixed in a later commit. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D13436 llvm-svn: 249424
1 parent fb33824 commit 88e0b25

File tree

8 files changed

+106
-25
lines changed

8 files changed

+106
-25
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
347347
bool ParseSectionDirectiveHSAText();
348348

349349
public:
350+
public:
351+
enum AMDGPUMatchResultTy {
352+
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
353+
};
354+
350355
AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
351356
const MCInstrInfo &MII,
352357
const MCTargetOptions &Options)
@@ -556,6 +561,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
556561
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
557562
return Match_InvalidOperand;
558563

564+
if ((TSFlags & SIInstrFlags::VOP3) &&
565+
(TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
566+
getForcedEncodingSize() != 64)
567+
return Match_PreferE32;
568+
559569
return Match_Success;
560570
}
561571

@@ -614,6 +624,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
614624
}
615625
return Error(ErrorLoc, "invalid operand for instruction");
616626
}
627+
case Match_PreferE32:
628+
return Error(IDLoc, "internal error: instruction without _e64 suffix "
629+
"should be encoded as e32");
617630
}
618631
llvm_unreachable("Implement any new match types added!");
619632
}
@@ -1701,8 +1714,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
17011714
}
17021715

17031716
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
1704-
((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
1705-
unsigned i = 2;
1717+
1718+
unsigned i = 1;
1719+
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
1720+
if (Desc.getNumDefs() > 0) {
1721+
((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
1722+
}
17061723

17071724
std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
17081725

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ enum {
3737
MIMG = 1 << 18,
3838
FLAT = 1 << 19,
3939
WQM = 1 << 20,
40-
VGPRSpill = 1 << 21
40+
VGPRSpill = 1 << 21,
41+
VOPAsmPrefer32Bit = 1 << 22
4142
};
4243
}
4344

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
4141
field bits<1> WQM = 0;
4242
field bits<1> VGPRSpill = 0;
4343

44+
// This bit tells the assembler to use the 32-bit encoding in case it
45+
// is unable to infer the encoding from the operands.
46+
field bits<1> VOPAsmPrefer32Bit = 0;
47+
4448
// These need to be kept in sync with the enum in SIInstrFlags.
4549
let TSFlags{0} = VM_CNT;
4650
let TSFlags{1} = EXP_CNT;
@@ -68,6 +72,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
6872
let TSFlags{19} = FLAT;
6973
let TSFlags{20} = WQM;
7074
let TSFlags{21} = VGPRSpill;
75+
let TSFlags{22} = VOPAsmPrefer32Bit;
7176

7277
let SchedRW = [Write32Bit];
7378
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -989,11 +989,12 @@ def InputModsNoDefault : Operand <i32> {
989989
let ParserMatchClass = InputModsMatchClass;
990990
}
991991

992-
class getNumSrcArgs<ValueType Src1, ValueType Src2> {
992+
class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
993993
int ret =
994-
!if (!eq(Src1.Value, untyped.Value), 1, // VOP1
994+
!if (!eq(Src0.Value, untyped.Value), 0,
995+
!if (!eq(Src1.Value, untyped.Value), 1, // VOP1
995996
!if (!eq(Src2.Value, untyped.Value), 2, // VOP2
996-
3)); // VOP3
997+
3))); // VOP3
997998
}
998999

9991000
// Returns the register class to use for the destination of VOP[123C]
@@ -1085,25 +1086,28 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
10851086
// Returns the assembly string for the inputs and outputs of a VOP[12C]
10861087
// instruction. This does not add the _e32 suffix, so it can be reused
10871088
// by getAsm64.
1088-
class getAsm32 <int NumSrcArgs> {
1089+
class getAsm32 <bit HasDst, int NumSrcArgs> {
1090+
string dst = "$dst";
1091+
string src0 = ", $src0";
10891092
string src1 = ", $src1";
10901093
string src2 = ", $src2";
1091-
string ret = "$dst, $src0"#
1092-
!if(!eq(NumSrcArgs, 1), "", src1)#
1093-
!if(!eq(NumSrcArgs, 3), src2, "");
1094+
string ret = !if(HasDst, dst, "") #
1095+
!if(!eq(NumSrcArgs, 1), src0, "") #
1096+
!if(!eq(NumSrcArgs, 2), src0#src1, "") #
1097+
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
10941098
}
10951099

10961100
// Returns the assembly string for the inputs and outputs of a VOP3
10971101
// instruction.
1098-
class getAsm64 <int NumSrcArgs, bit HasModifiers> {
1102+
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers> {
10991103
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
11001104
string src1 = !if(!eq(NumSrcArgs, 1), "",
11011105
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
11021106
" $src1_modifiers,"));
11031107
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
11041108
string ret =
11051109
!if(!eq(HasModifiers, 0),
1106-
getAsm32<NumSrcArgs>.ret,
1110+
getAsm32<HasDst, NumSrcArgs>.ret,
11071111
"$dst, "#src0#src1#src2#"$clamp"#"$omod");
11081112
}
11091113

@@ -1122,11 +1126,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
11221126
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
11231127
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
11241128

1125-
field bit HasDst32 = !if(!eq(DstVT, untyped), 0, 1);
1126-
field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
1129+
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
1130+
field bit HasDst32 = HasDst;
1131+
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
11271132
field bit HasModifiers = hasModifiers<Src0VT>.ret;
11281133

1129-
field dag Outs = (outs DstRC:$dst);
1134+
field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs));
11301135

11311136
// VOP3b instructions are a special case with a second explicit
11321137
// output. This is manually overridden for them.
@@ -1137,8 +1142,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
11371142
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
11381143
HasModifiers>.ret;
11391144

1140-
field string Asm32 = getAsm32<NumSrcArgs>.ret;
1141-
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
1145+
field string Asm32 = getAsm32<HasDst, NumSrcArgs>.ret;
1146+
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers>.ret;
11421147
}
11431148

11441149
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
@@ -1151,6 +1156,8 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
11511156
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
11521157
def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
11531158

1159+
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
1160+
11541161
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
11551162
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
11561163
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -1246,8 +1253,8 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
12461253
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
12471254
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
12481255
HasModifiers>.ret;
1249-
let Asm32 = getAsm32<2>.ret;
1250-
let Asm64 = getAsm64<2, HasModifiers>.ret;
1256+
let Asm32 = getAsm32<1, 2>.ret;
1257+
let Asm64 = getAsm64<1, 2, HasModifiers>.ret;
12511258
}
12521259
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
12531260
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -1424,6 +1431,9 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
14241431
MnemonicAlias<opName#"_e64", opName> {
14251432
let isPseudo = 1;
14261433
let isCodeGenOnly = 1;
1434+
1435+
field bit vdst;
1436+
field bit src0;
14271437
}
14281438

14291439
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,8 +1156,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
11561156
// VOP1 Instructions
11571157
//===----------------------------------------------------------------------===//
11581158

1159-
let vdst = 0, src0 = 0 in {
1160-
defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
1159+
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
1160+
defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
11611161
}
11621162

11631163
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1332,10 +1332,8 @@ defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
13321332
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
13331333
VOP_F32_F32
13341334
>;
1335-
let vdst = 0, src0 = 0 in {
1336-
defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
1337-
"v_clrexcp"
1338-
>;
1335+
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
1336+
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
13391337
}
13401338
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
13411339
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;

llvm/test/MC/AMDGPU/vop1.s

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,25 @@
88
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
99
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
1010

11+
// Force 32-bit encoding
12+
13+
// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
14+
v_mov_b32_e32 v1, v2
15+
16+
// Force 32-bit encoding for special instructions
17+
// FIXME: We should be printing _e32 suffixes for these:
18+
19+
// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
20+
v_nop_e32
21+
22+
// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
23+
// VI: v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e]
24+
v_clrexcp_e32
25+
26+
//===----------------------------------------------------------------------===//
27+
// Instructions
28+
//===----------------------------------------------------------------------===//
29+
1130

1231
// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
1332
v_nop

llvm/test/MC/AMDGPU/vop3-vop1-nosrc.s

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
2+
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
3+
// XFAIL: *
4+
5+
// FIXME: We should be printing _e64 suffixes for these.
6+
// FIXME: When this is fixed delete this file and fix test case in vop3.s
7+
8+
v_nop_e64
9+
// SICI: v_nop_e64 ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
10+
// VI: v_nop_e64 ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
11+
12+
v_clrexcp_e64
13+
// SICI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
14+
// VI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]

llvm/test/MC/AMDGPU/vop3.s

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,23 @@ v_cmp_f_i64 s[2:3], v[4:5], v[6:7]
118118
// VOP1 Instructions
119119
//===----------------------------------------------------------------------===//
120120

121+
// Test forced e64 encoding with e32 operands
122+
123+
v_mov_b32_e64 v1, v2
124+
// SICI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x02,0xd3,0x02,0x01,0x00,0x00]
125+
// VI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x41,0xd1,0x02,0x01,0x00,0x00]
126+
127+
// Force e64 encoding for special instructions.
128+
// FIXME, we should be printing the _e64 suffix for v_nop and v_clrexcp.
129+
130+
v_nop_e64
131+
// SICI: v_nop ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
132+
// VI: v_nop ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
133+
134+
v_clrexcp_e64
135+
// SICI: v_clrexcp ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
136+
// VI: v_clrexcp ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
137+
121138
//
122139
// Modifier tests:
123140
//

0 commit comments

Comments
 (0)