@@ -207,33 +207,39 @@ class ValueToRegClass<ValueType T> {
207
207
// Some Common Instruction Class Templates
208
208
//===----------------------------------------------------------------------===//
209
209
210
+ // Utility class to wrap up information about a register and DAG type for more
211
+ // convenient iteration and parameterization
212
+ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm> {
213
+ ValueType Ty = ty;
214
+ NVPTXRegClass RC = rc;
215
+ Operand Imm = imm;
216
+ int Size = ty.Size;
217
+ }
218
+
219
+ def I16RT : RegTyInfo<i16, Int16Regs, i16imm>;
220
+ def I32RT : RegTyInfo<i32, Int32Regs, i32imm>;
221
+ def I64RT : RegTyInfo<i64, Int64Regs, i64imm>;
222
+
210
223
// Template for instructions which take three int64, int32, or int16 args.
211
224
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
212
- multiclass I3<string OpcStr, SDNode OpNode> {
213
- def i64rr :
214
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
215
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
216
- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
217
- def i64ri :
218
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
219
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
220
- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
221
- def i32rr :
222
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
223
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
224
- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
225
- def i32ri :
226
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
227
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
228
- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
229
- def i16rr :
230
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
231
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
232
- [(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
233
- def i16ri :
234
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
235
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
236
- [(set i16:$dst, (OpNode i16:$a, (imm):$b))]>;
225
+ multiclass I3<string OpcStr, SDNode OpNode, bit commutative> {
226
+ foreach t = [I16RT, I32RT, I64RT] in {
227
+ defvar asmstr = OpcStr # t.Size # " \t$dst, $a, $b;";
228
+
229
+ def t.Ty # rr :
230
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
231
+ asmstr,
232
+ [(set t.Ty:$dst, (OpNode t.Ty:$a, t.Ty:$b))]>;
233
+ def t.Ty # ri :
234
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
235
+ asmstr,
236
+ [(set t.Ty:$dst, (OpNode t.RC:$a, imm:$b))]>;
237
+ if !not(commutative) then
238
+ def t.Ty # ir :
239
+ NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
240
+ asmstr,
241
+ [(set t.Ty:$dst, (OpNode imm:$a, t.RC:$b))]>;
242
+ }
237
243
}
238
244
239
245
class I16x2<string OpcStr, SDNode OpNode> :
@@ -870,8 +876,8 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
870
876
871
877
// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
872
878
// also use these for unsigned arithmetic.
873
- defm ADD : I3<"add.s", add>;
874
- defm SUB : I3<"sub.s", sub>;
879
+ defm ADD : I3<"add.s", add, /*commutative=*/ true >;
880
+ defm SUB : I3<"sub.s", sub, /*commutative=*/ false >;
875
881
876
882
def ADD16x2 : I16x2<"add.s", add>;
877
883
@@ -883,18 +889,18 @@ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
883
889
defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
884
890
defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
885
891
886
- defm MULT : I3<"mul.lo.s", mul>;
892
+ defm MULT : I3<"mul.lo.s", mul, /*commutative=*/ true >;
887
893
888
- defm MULTHS : I3<"mul.hi.s", mulhs>;
889
- defm MULTHU : I3<"mul.hi.u", mulhu>;
894
+ defm MULTHS : I3<"mul.hi.s", mulhs, /*commutative=*/ true >;
895
+ defm MULTHU : I3<"mul.hi.u", mulhu, /*commutative=*/ true >;
890
896
891
- defm SDIV : I3<"div.s", sdiv>;
892
- defm UDIV : I3<"div.u", udiv>;
897
+ defm SDIV : I3<"div.s", sdiv, /*commutative=*/ false >;
898
+ defm UDIV : I3<"div.u", udiv, /*commutative=*/ false >;
893
899
894
900
// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
895
901
// will lower it.
896
- defm SREM : I3<"rem.s", srem>;
897
- defm UREM : I3<"rem.u", urem>;
902
+ defm SREM : I3<"rem.s", srem, /*commutative=*/ false >;
903
+ defm UREM : I3<"rem.u", urem, /*commutative=*/ false >;
898
904
899
905
// Integer absolute value. NumBits should be one minus the bit width of RC.
900
906
// This idiom implements the algorithm at
@@ -909,10 +915,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
909
915
defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
910
916
911
917
// Integer min/max.
912
- defm SMAX : I3<"max.s", smax>;
913
- defm UMAX : I3<"max.u", umax>;
914
- defm SMIN : I3<"min.s", smin>;
915
- defm UMIN : I3<"min.u", umin>;
918
+ defm SMAX : I3<"max.s", smax, /*commutative=*/ true >;
919
+ defm UMAX : I3<"max.u", umax, /*commutative=*/ true >;
920
+ defm SMIN : I3<"min.s", smin, /*commutative=*/ true >;
921
+ defm UMIN : I3<"min.u", umin, /*commutative=*/ true >;
916
922
917
923
def SMAX16x2 : I16x2<"max.s", smax>;
918
924
def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1392,25 +1398,32 @@ def FDIV32ri_prec :
1392
1398
//
1393
1399
1394
1400
multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
1395
- def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
1396
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1397
- [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
1398
- Requires<[Pred]>;
1399
- def rri : NVPTXInst<(outs RC:$dst),
1400
- (ins RC:$a, RC:$b, ImmCls:$c),
1401
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1402
- [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
1403
- Requires<[Pred]>;
1404
- def rir : NVPTXInst<(outs RC:$dst),
1405
- (ins RC:$a, ImmCls:$b, RC:$c),
1406
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1407
- [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
1408
- Requires<[Pred]>;
1409
- def rii : NVPTXInst<(outs RC:$dst),
1410
- (ins RC:$a, ImmCls:$b, ImmCls:$c),
1411
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1412
- [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
1413
- Requires<[Pred]>;
1401
+ defvar asmstr = OpcStr # " \t$dst, $a, $b, $c;";
1402
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
1403
+ asmstr,
1404
+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
1405
+ Requires<[Pred]>;
1406
+ def rri : NVPTXInst<(outs RC:$dst),
1407
+ (ins RC:$a, RC:$b, ImmCls:$c),
1408
+ asmstr,
1409
+ [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
1410
+ Requires<[Pred]>;
1411
+ def rir : NVPTXInst<(outs RC:$dst),
1412
+ (ins RC:$a, ImmCls:$b, RC:$c),
1413
+ asmstr,
1414
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
1415
+ Requires<[Pred]>;
1416
+ def rii : NVPTXInst<(outs RC:$dst),
1417
+ (ins RC:$a, ImmCls:$b, ImmCls:$c),
1418
+ asmstr,
1419
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
1420
+ Requires<[Pred]>;
1421
+ def iir : NVPTXInst<(outs RC:$dst),
1422
+ (ins ImmCls:$a, ImmCls:$b, RC:$c),
1423
+ asmstr,
1424
+ [(set RC:$dst, (fma fpimm:$a, fpimm:$b, RC:$c))]>,
1425
+ Requires<[Pred]>;
1426
+
1414
1427
}
1415
1428
1416
1429
multiclass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
0 commit comments