Skip to content

Commit f152f4f

Browse files
committed
[DAG] Always allow folding XOR patterns to ABS pre-legalization
1 parent 64c8b66 commit f152f4f

15 files changed

+2862
-3016
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4042,7 +4042,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
40424042
}
40434043

40444044
// fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4045-
if (hasOperation(ISD::ABS, VT) &&
4045+
if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
40464046
sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
40474047
sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
40484048
return DAG.getNode(ISD::ABS, DL, VT, A);
@@ -9526,7 +9526,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
95269526
}
95279527

95289528
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9529-
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9529+
if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
95309530
SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
95319531
SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
95329532
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -135,31 +135,31 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
135135
; GCN-NEXT: s_waitcnt lgkmcnt(0)
136136
; GCN-NEXT: v_mov_b32_e32 v1, s4
137137
; GCN-NEXT: v_cndmask_b32_e32 v0, 5, v1, vcc
138-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
139-
; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1
140-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
141-
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v0
142-
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v0
138+
; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0
139+
; GCN-NEXT: v_max_i32_e32 v1, v0, v1
140+
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1
141+
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v1
143142
; GCN-NEXT: s_mov_b32 s4, 0xf4240
144143
; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v2
144+
; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
145145
; GCN-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
146146
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
147147
; GCN-NEXT: v_mul_lo_u32 v3, v3, v2
148148
; GCN-NEXT: v_mul_hi_u32 v3, v2, v3
149149
; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v3
150150
; GCN-NEXT: v_mul_hi_u32 v2, v2, s4
151-
; GCN-NEXT: v_mul_lo_u32 v3, v2, v0
151+
; GCN-NEXT: v_mul_lo_u32 v3, v2, v1
152152
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
153153
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0xf4240, v3
154-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
154+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
155155
; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
156-
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v0
156+
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v1
157157
; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
158158
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
159-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
160-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
161-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
162-
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
159+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
160+
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
161+
; GCN-NEXT: v_xor_b32_e32 v1, v1, v0
162+
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
163163
; GCN-NEXT: s_setpc_b64 s[30:31]
164164
%select = select i1 %cond, i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 5
165165
%op = sdiv i32 1000000, %select
@@ -217,31 +217,31 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
217217
; GCN-NEXT: s_waitcnt lgkmcnt(0)
218218
; GCN-NEXT: v_mov_b32_e32 v1, s4
219219
; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 5, vcc
220-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
221-
; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1
222-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
223-
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v0
224-
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v0
220+
; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0
221+
; GCN-NEXT: v_max_i32_e32 v1, v0, v1
222+
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1
223+
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v1
225224
; GCN-NEXT: s_mov_b32 s4, 0xf4240
226225
; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v2
226+
; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
227227
; GCN-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
228228
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
229229
; GCN-NEXT: v_mul_lo_u32 v3, v3, v2
230230
; GCN-NEXT: v_mul_hi_u32 v3, v2, v3
231231
; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v3
232232
; GCN-NEXT: v_mul_hi_u32 v2, v2, s4
233-
; GCN-NEXT: v_mul_lo_u32 v3, v2, v0
233+
; GCN-NEXT: v_mul_lo_u32 v3, v2, v1
234234
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
235235
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0xf4240, v3
236-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
236+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
237237
; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
238-
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v0
238+
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v1
239239
; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
240240
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
241-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
242-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
243-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
244-
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
241+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
242+
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
243+
; GCN-NEXT: v_xor_b32_e32 v1, v1, v0
244+
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
245245
; GCN-NEXT: s_setpc_b64 s[30:31]
246246
%select = select i1 %cond, i32 5, i32 ptrtoint (ptr addrspace(1) @gv to i32)
247247
%op = sdiv i32 1000000, %select

0 commit comments

Comments
 (0)