Skip to content

Commit b241821

Browse files
Artem-Bthomasfaingnaert
authored andcommitted
[NVPTX] Remove few more unneeded fp16 instruction variants
Differential Revision: https://reviews.llvm.org/D152478
1 parent 483f1db commit b241821

File tree

4 files changed

+15
-64
lines changed

4 files changed

+15
-64
lines changed

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,6 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
6060
case 6:
6161
OS << "%fd";
6262
break;
63-
case 7:
64-
OS << "%h";
65-
break;
66-
case 8:
67-
OS << "%hh";
68-
break;
6963
}
7064

7165
unsigned VReg = Reg.id() & 0x0FFFFFFF;

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -638,18 +638,10 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
638638
if (E0.empty() || E1.empty())
639639
return false;
640640

641-
unsigned Op = NVPTX::SplitF16x2;
642-
// If the vector has been BITCAST'ed from i32, we can use original
643-
// value directly and avoid register-to-register move.
644-
SDValue Source = Vector;
645-
if (Vector->getOpcode() == ISD::BITCAST) {
646-
Op = NVPTX::SplitI32toF16x2;
647-
Source = Vector->getOperand(0);
648-
}
649641
// Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
650642
// into f16,f16 SplitF16x2(V)
651-
SDNode *ScatterOp =
652-
CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
643+
SDNode *ScatterOp = CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N),
644+
MVT::f16, MVT::f16, Vector);
653645
for (auto *Node : E0)
654646
ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
655647
for (auto *Node : E1)

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 10 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,8 +2665,6 @@ let mayLoad=1, hasSideEffects=0 in {
26652665
defm LDV_i16 : LD_VEC<Int16Regs>;
26662666
defm LDV_i32 : LD_VEC<Int32Regs>;
26672667
defm LDV_i64 : LD_VEC<Int64Regs>;
2668-
defm LDV_f16 : LD_VEC<Int16Regs>;
2669-
defm LDV_f16x2 : LD_VEC<Int32Regs>;
26702668
defm LDV_f32 : LD_VEC<Float32Regs>;
26712669
defm LDV_f64 : LD_VEC<Float64Regs>;
26722670
}
@@ -2760,8 +2758,6 @@ let mayStore=1, hasSideEffects=0 in {
27602758
defm STV_i16 : ST_VEC<Int16Regs>;
27612759
defm STV_i32 : ST_VEC<Int32Regs>;
27622760
defm STV_i64 : ST_VEC<Int64Regs>;
2763-
defm STV_f16 : ST_VEC<Int16Regs>;
2764-
defm STV_f16x2 : ST_VEC<Int32Regs>;
27652761
defm STV_f32 : ST_VEC<Float32Regs>;
27662762
defm STV_f64 : ST_VEC<Float64Regs>;
27672763
}
@@ -3074,6 +3070,10 @@ let hasSideEffects = false in {
30743070
(ins Int32Regs:$s),
30753071
"{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
30763072
[]>;
3073+
def I32toI16L : NVPTXInst<(outs Int16Regs:$low),
3074+
(ins Int32Regs:$s),
3075+
"{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
3076+
[]>;
30773077
def I64toI32H : NVPTXInst<(outs Int32Regs:$high),
30783078
(ins Int64Regs:$s),
30793079
"{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
@@ -3091,47 +3091,12 @@ def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))),
30913091
def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))),
30923092
(I64toI32H Int64Regs:$s)>;
30933093

3094-
let hasSideEffects = false in {
3095-
// Extract element of f16x2 register. PTX does not provide any way
3096-
// to access elements of f16x2 vector directly, so we need to
3097-
// extract it using a temporary register.
3098-
def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst),
3099-
(ins Int32Regs:$src),
3100-
"{{ .reg .b16 \t%tmp_hi;\n\t"
3101-
" mov.b32 \t{$dst, %tmp_hi}, $src; }}",
3102-
[(set Int16Regs:$dst,
3103-
(extractelt (v2f16 Int32Regs:$src), 0))]>;
3104-
def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst),
3105-
(ins Int32Regs:$src),
3106-
"{{ .reg .b16 \t%tmp_lo;\n\t"
3107-
" mov.b32 \t{%tmp_lo, $dst}, $src; }}",
3108-
[(set Int16Regs:$dst,
3109-
(extractelt (v2f16 Int32Regs:$src), 1))]>;
3110-
3111-
// Coalesce two f16 registers into f16x2
3112-
def BuildF16x2 : NVPTXInst<(outs Int32Regs:$dst),
3113-
(ins Int16Regs:$a, Int16Regs:$b),
3114-
"mov.b32 \t$dst, {{$a, $b}};",
3115-
[(set (v2f16 Int32Regs:$dst),
3116-
(build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>;
3117-
3118-
// Directly initializing underlying the b32 register is one less SASS
3119-
// instruction than than vector-packing move.
3120-
def BuildF16x2i : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
3121-
"mov.b32 \t$dst, $src;",
3122-
[]>;
3123-
3124-
// Split f16x2 into two f16 registers.
3125-
def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
3126-
(ins Int32Regs:$src),
3127-
"mov.b32 \t{{$lo, $hi}}, $src;",
3128-
[]>;
3129-
// Split an i32 into two f16
3130-
def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
3131-
(ins Int32Regs:$src),
3132-
"mov.b32 \t{{$lo, $hi}}, $src;",
3133-
[]>;
3134-
}
3094+
def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 0)),
3095+
(I32toI16L Int32Regs:$src)>;
3096+
def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 1)),
3097+
(I32toI16H Int32Regs:$src)>;
3098+
def : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
3099+
(V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
31353100

31363101
// Count leading zeros
31373102
let hasSideEffects = false in {

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define <2 x half> @test_ret_const() #0 {
4040

4141
; CHECK-LABEL: test_extract_0(
4242
; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_0_param_0];
43-
; CHECK: mov.b32 {[[R:%rs[0-9]+]], %tmp_hi}, [[A]];
43+
; CHECK: mov.b32 {[[R:%rs[0-9]+]], tmp}, [[A]];
4444
; CHECK: st.param.b16 [func_retval0+0], [[R]];
4545
; CHECK: ret;
4646
define half @test_extract_0(<2 x half> %a) #0 {
@@ -50,7 +50,7 @@ define half @test_extract_0(<2 x half> %a) #0 {
5050

5151
; CHECK-LABEL: test_extract_1(
5252
; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_1_param_0];
53-
; CHECK: mov.b32 {%tmp_lo, [[R:%rs[0-9]+]]}, [[A]];
53+
; CHECK: mov.b32 {tmp, [[R:%rs[0-9]+]]}, [[A]];
5454
; CHECK: st.param.b16 [func_retval0+0], [[R]];
5555
; CHECK: ret;
5656
define half @test_extract_1(<2 x half> %a) #0 {
@@ -1468,7 +1468,7 @@ define <2 x half> @test_shufflevector(<2 x half> %a) #0 {
14681468
}
14691469

14701470
; CHECK-LABEL: test_insertelement(
1471-
; CHECK: mov.b32 {%rs2, %tmp_hi}, %r1;
1471+
; CHECK: mov.b32 {%rs2, tmp}, %r1;
14721472
; CHECK: mov.b32 %r2, {%rs2, %rs1};
14731473
define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 {
14741474
%i = insertelement <2 x half> %a, half %x, i64 1

0 commit comments

Comments
 (0)