Skip to content

Commit 593e953

Browse files
committed
[NVPTX] Fix lowering of i1 SETCC - address comments
1 parent ec066d3 commit 593e953

File tree

4 files changed

+245
-2
lines changed

4 files changed

+245
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18754,8 +18754,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
1875418754
if (LegalTypes)
1875518755
SetCCVT = getSetCCResultType(SetCCVT);
1875618756
// Replace the uses of XOR with SETCC
18757-
return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18758-
Equal ? ISD::SETEQ : ISD::SETNE);
18757+
const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
18758+
if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
18759+
return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
1875918760
}
1876018761
}
1876118762

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
668668
setTruncStoreAction(VT, MVT::i1, Expand);
669669
}
670670

671+
setCondCodeAction({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
672+
ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
673+
ISD::SETGE, ISD::SETLE},
674+
MVT::i1, Custom);
675+
671676
// expand extload of vector of integers.
672677
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
673678
MVT::v2i8, Expand);
@@ -2666,6 +2671,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
26662671
}
26672672
}
26682673

2674+
// Lowers SETCC nodes that aren't directly supported by our arch.
2675+
SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2676+
SDValue L = Op->getOperand(0);
2677+
SDValue R = Op->getOperand(1);
2678+
2679+
if (L.getValueType() != MVT::i1)
2680+
return SDValue();
2681+
2682+
SDLoc DL(Op);
2683+
SDValue Ret;
2684+
switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
2685+
default:
2686+
llvm_unreachable("Unknown integer setcc!");
2687+
case ISD::SETEQ: // X == Y -> ~(X^Y)
2688+
Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
2689+
break;
2690+
case ISD::SETNE: // X != Y --> (X^Y)
2691+
Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
2692+
break;
2693+
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2694+
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2695+
Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
2696+
break;
2697+
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2698+
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2699+
Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
2700+
break;
2701+
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2702+
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2703+
Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
2704+
break;
2705+
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2706+
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2707+
Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
2708+
break;
2709+
}
2710+
2711+
return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
2712+
}
2713+
26692714
/// If the types match, convert the generic copysign to the NVPTXISD version,
26702715
/// otherwise bail ensuring that mismatched cases are properly expaned.
26712716
SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2919,6 +2964,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
29192964
return LowerSTORE(Op, DAG);
29202965
case ISD::LOAD:
29212966
return LowerLOAD(Op, DAG);
2967+
case ISD::SETCC:
2968+
return LowerSETCC(Op, DAG);
29222969
case ISD::SHL_PARTS:
29232970
return LowerShiftLeftParts(Op, DAG);
29242971
case ISD::SRA_PARTS:

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,8 @@ class NVPTXTargetLowering : public TargetLowering {
628628
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
629629
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
630630

631+
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
632+
631633
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
632634

633635
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/NVPTX/i1-icmp.ll

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
4+
5+
target triple = "nvptx-nvidia-cuda"
6+
7+
define i32 @icmp_i1_eq(i32 %a, i32 %b) {
8+
; CHECK-LABEL: icmp_i1_eq(
9+
; CHECK: {
10+
; CHECK-NEXT: .reg .pred %p<4>;
11+
; CHECK-NEXT: .reg .b32 %r<5>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0:
14+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_eq_param_0];
15+
; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
16+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_eq_param_1];
17+
; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
18+
; CHECK-NEXT: xor.pred %p3, %p1, %p2;
19+
; CHECK-NEXT: @%p3 bra $L__BB0_2;
20+
; CHECK-NEXT: // %bb.1: // %bb1
21+
; CHECK-NEXT: mov.b32 %r4, 1;
22+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
23+
; CHECK-NEXT: ret;
24+
; CHECK-NEXT: $L__BB0_2: // %bb2
25+
; CHECK-NEXT: mov.b32 %r3, 127;
26+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
27+
; CHECK-NEXT: ret;
28+
%p1 = icmp sgt i32 %a, 1
29+
%p2 = icmp sgt i32 %b, 1
30+
%c = icmp eq i1 %p1, %p2
31+
br i1 %c, label %bb1, label %bb2
32+
bb1:
33+
ret i32 1
34+
bb2:
35+
ret i32 127
36+
}
37+
38+
define i32 @icmp_i1_ne(i32 %a, i32 %b) {
39+
; CHECK-LABEL: icmp_i1_ne(
40+
; CHECK: {
41+
; CHECK-NEXT: .reg .pred %p<5>;
42+
; CHECK-NEXT: .reg .b32 %r<5>;
43+
; CHECK-EMPTY:
44+
; CHECK-NEXT: // %bb.0:
45+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_ne_param_0];
46+
; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
47+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_ne_param_1];
48+
; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
49+
; CHECK-NEXT: xor.pred %p3, %p1, %p2;
50+
; CHECK-NEXT: not.pred %p4, %p3;
51+
; CHECK-NEXT: @%p4 bra $L__BB1_2;
52+
; CHECK-NEXT: // %bb.1: // %bb1
53+
; CHECK-NEXT: mov.b32 %r4, 1;
54+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
55+
; CHECK-NEXT: ret;
56+
; CHECK-NEXT: $L__BB1_2: // %bb2
57+
; CHECK-NEXT: mov.b32 %r3, 127;
58+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
59+
; CHECK-NEXT: ret;
60+
%p1 = icmp sgt i32 %a, 1
61+
%p2 = icmp sgt i32 %b, 1
62+
%c = icmp ne i1 %p1, %p2
63+
br i1 %c, label %bb1, label %bb2
64+
bb1:
65+
ret i32 1
66+
bb2:
67+
ret i32 127
68+
}
69+
70+
define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
71+
; CHECK-LABEL: icmp_i1_sgt(
72+
; CHECK: {
73+
; CHECK-NEXT: .reg .pred %p<4>;
74+
; CHECK-NEXT: .reg .b32 %r<5>;
75+
; CHECK-EMPTY:
76+
; CHECK-NEXT: // %bb.0:
77+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sgt_param_0];
78+
; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
79+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sgt_param_1];
80+
; CHECK-NEXT: setp.lt.s32 %p2, %r2, 2;
81+
; CHECK-NEXT: or.pred %p3, %p1, %p2;
82+
; CHECK-NEXT: @%p3 bra $L__BB2_2;
83+
; CHECK-NEXT: // %bb.1: // %bb1
84+
; CHECK-NEXT: mov.b32 %r4, 1;
85+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
86+
; CHECK-NEXT: ret;
87+
; CHECK-NEXT: $L__BB2_2: // %bb2
88+
; CHECK-NEXT: mov.b32 %r3, 127;
89+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
90+
; CHECK-NEXT: ret;
91+
%p1 = icmp sgt i32 %a, 1
92+
%p2 = icmp sgt i32 %b, 1
93+
%c = icmp sgt i1 %p1, %p2
94+
br i1 %c, label %bb1, label %bb2
95+
bb1:
96+
ret i32 1
97+
bb2:
98+
ret i32 127
99+
}
100+
101+
define i32 @icmp_i1_slt(i32 %a, i32 %b) {
102+
; CHECK-LABEL: icmp_i1_slt(
103+
; CHECK: {
104+
; CHECK-NEXT: .reg .pred %p<4>;
105+
; CHECK-NEXT: .reg .b32 %r<5>;
106+
; CHECK-EMPTY:
107+
; CHECK-NEXT: // %bb.0:
108+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_slt_param_0];
109+
; CHECK-NEXT: setp.lt.s32 %p1, %r1, 2;
110+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_slt_param_1];
111+
; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
112+
; CHECK-NEXT: or.pred %p3, %p2, %p1;
113+
; CHECK-NEXT: @%p3 bra $L__BB3_2;
114+
; CHECK-NEXT: // %bb.1: // %bb1
115+
; CHECK-NEXT: mov.b32 %r4, 1;
116+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
117+
; CHECK-NEXT: ret;
118+
; CHECK-NEXT: $L__BB3_2: // %bb2
119+
; CHECK-NEXT: mov.b32 %r3, 127;
120+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
121+
; CHECK-NEXT: ret;
122+
%p1 = icmp sgt i32 %a, 1
123+
%p2 = icmp sgt i32 %b, 1
124+
%c = icmp slt i1 %p1, %p2
125+
br i1 %c, label %bb1, label %bb2
126+
bb1:
127+
ret i32 1
128+
bb2:
129+
ret i32 127
130+
}
131+
132+
define i32 @icmp_i1_sge(i32 %a, i32 %b) {
133+
; CHECK-LABEL: icmp_i1_sge(
134+
; CHECK: {
135+
; CHECK-NEXT: .reg .pred %p<4>;
136+
; CHECK-NEXT: .reg .b32 %r<5>;
137+
; CHECK-EMPTY:
138+
; CHECK-NEXT: // %bb.0:
139+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sge_param_0];
140+
; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
141+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sge_param_1];
142+
; CHECK-NEXT: setp.lt.s32 %p2, %r2, 2;
143+
; CHECK-NEXT: and.pred %p3, %p1, %p2;
144+
; CHECK-NEXT: @%p3 bra $L__BB4_2;
145+
; CHECK-NEXT: // %bb.1: // %bb1
146+
; CHECK-NEXT: mov.b32 %r4, 1;
147+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
148+
; CHECK-NEXT: ret;
149+
; CHECK-NEXT: $L__BB4_2: // %bb2
150+
; CHECK-NEXT: mov.b32 %r3, 127;
151+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
152+
; CHECK-NEXT: ret;
153+
%p1 = icmp sgt i32 %a, 1
154+
%p2 = icmp sgt i32 %b, 1
155+
%c = icmp sge i1 %p1, %p2
156+
br i1 %c, label %bb1, label %bb2
157+
bb1:
158+
ret i32 1
159+
bb2:
160+
ret i32 127
161+
}
162+
163+
define i32 @icmp_i1_sle(i32 %a, i32 %b) {
164+
; CHECK-LABEL: icmp_i1_sle(
165+
; CHECK: {
166+
; CHECK-NEXT: .reg .pred %p<4>;
167+
; CHECK-NEXT: .reg .b32 %r<5>;
168+
; CHECK-EMPTY:
169+
; CHECK-NEXT: // %bb.0:
170+
; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sle_param_0];
171+
; CHECK-NEXT: setp.lt.s32 %p1, %r1, 2;
172+
; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sle_param_1];
173+
; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
174+
; CHECK-NEXT: and.pred %p3, %p2, %p1;
175+
; CHECK-NEXT: @%p3 bra $L__BB5_2;
176+
; CHECK-NEXT: // %bb.1: // %bb1
177+
; CHECK-NEXT: mov.b32 %r4, 1;
178+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
179+
; CHECK-NEXT: ret;
180+
; CHECK-NEXT: $L__BB5_2: // %bb2
181+
; CHECK-NEXT: mov.b32 %r3, 127;
182+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
183+
; CHECK-NEXT: ret;
184+
%p1 = icmp sgt i32 %a, 1
185+
%p2 = icmp sgt i32 %b, 1
186+
%c = icmp sle i1 %p1, %p2
187+
br i1 %c, label %bb1, label %bb2
188+
bb1:
189+
ret i32 1
190+
bb2:
191+
ret i32 127
192+
}
193+

0 commit comments

Comments
 (0)