Skip to content

Commit ae73863

Browse files
committed
[DAGCombiner] Freeze maybe poison operands when folding select to logic
Just like for regular IR we need to treat SELECT as conditionally blocking poison. So (unless the condition itself is poison) the result is only poison if the selected true/false value is poison. Thus, when doing DAG combines that turn SELECT into arithmetic/logical operations (e.g. AND/OR) we need to make sure that the new operations aren't more poisonous. One way to do that is to use FREEZE to make sure the operands aren't posion. This patch aims at fixing the kind of miscompiles reported in #84653 and #85190 Solution is to make sure that we insert FREEZE, if needed to make the fold sound, when using the foldBoolSelectToLogic and foldVSelectToSignBitSplatMask DAG combines. This may result in some (hopefully minor) regressions since we lack some ways to fold away the freeze (or due to isGuaranteedNotToBePoison being too pessimistic). Focus in this patch is to just avoid miscompiles, but I think some of the regressions can be avoided by general improvements regarding poison/freeze handling in SelectionDAG.
1 parent d0d4003 commit ae73863

30 files changed

+543
-587
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11343,28 +11343,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
1134311343
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
1134411344
return SDValue();
1134511345

11346-
// select Cond, Cond, F --> or Cond, F
11347-
// select Cond, 1, F --> or Cond, F
11346+
// select Cond, Cond, F --> or Cond, freeze(F)
11347+
// select Cond, 1, F --> or Cond, freeze(F)
1134811348
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11349-
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11349+
return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, DAG.getFreeze(F));
1135011350

1135111351
// select Cond, T, Cond --> and Cond, T
1135211352
// select Cond, T, 0 --> and Cond, T
1135311353
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11354-
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11354+
return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, DAG.getFreeze(T));
1135511355

1135611356
// select Cond, T, 1 --> or (not Cond), T
1135711357
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
1135811358
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1135911359
DAG.getAllOnesConstant(SDLoc(N), VT));
11360-
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11360+
return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, DAG.getFreeze(T));
1136111361
}
1136211362

1136311363
// select Cond, 0, F --> and (not Cond), F
1136411364
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1136511365
SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
1136611366
DAG.getAllOnesConstant(SDLoc(N), VT));
11367-
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11367+
return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, DAG.getFreeze(F));
1136811368
}
1136911369

1137011370
return SDValue();
@@ -11398,15 +11398,15 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1139811398
SDLoc DL(N);
1139911399
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1140011400
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11401-
return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11401+
return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
1140211402
}
1140311403

1140411404
// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
1140511405
if (isAllOnesOrAllOnesSplat(N1)) {
1140611406
SDLoc DL(N);
1140711407
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1140811408
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11409-
return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11409+
return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
1141011410
}
1141111411

1141211412
// If we have to invert the sign bit mask, only do that transform if the
@@ -11418,7 +11418,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
1141811418
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
1141911419
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
1142011420
SDValue Not = DAG.getNOT(DL, Sra, VT);
11421-
return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11421+
return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
1142211422
}
1142311423

1142411424
// TODO: There's another pattern in this family, but it may require

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 69 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,14 @@
66

77
; (x0 < x1) && (x2 > x3)
88
define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
9-
; SDISEL-LABEL: cmp_and2:
10-
; SDISEL: // %bb.0:
11-
; SDISEL-NEXT: cmp w0, w1
12-
; SDISEL-NEXT: ccmp w2, w3, #0, lo
13-
; SDISEL-NEXT: cset w0, hi
14-
; SDISEL-NEXT: ret
15-
;
16-
; GISEL-LABEL: cmp_and2:
17-
; GISEL: // %bb.0:
18-
; GISEL-NEXT: cmp w0, w1
19-
; GISEL-NEXT: cset w8, lo
20-
; GISEL-NEXT: cmp w2, w3
21-
; GISEL-NEXT: cset w9, hi
22-
; GISEL-NEXT: and w0, w8, w9
23-
; GISEL-NEXT: ret
9+
; CHECK-LABEL: cmp_and2:
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: cmp w0, w1
12+
; CHECK-NEXT: cset w8, lo
13+
; CHECK-NEXT: cmp w2, w3
14+
; CHECK-NEXT: cset w9, hi
15+
; CHECK-NEXT: and w0, w8, w9
16+
; CHECK-NEXT: ret
2417
%5 = icmp ult i32 %0, %1
2518
%6 = icmp ugt i32 %2, %3
2619
%7 = select i1 %5, i1 %6, i1 false
@@ -30,25 +23,17 @@ define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
3023

3124
; (x0 < x1) && (x2 > x3) && (x4 != x5)
3225
define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
33-
; SDISEL-LABEL: cmp_and3:
34-
; SDISEL: // %bb.0:
35-
; SDISEL-NEXT: cmp w0, w1
36-
; SDISEL-NEXT: ccmp w2, w3, #0, lo
37-
; SDISEL-NEXT: ccmp w4, w5, #4, hi
38-
; SDISEL-NEXT: cset w0, ne
39-
; SDISEL-NEXT: ret
40-
;
41-
; GISEL-LABEL: cmp_and3:
42-
; GISEL: // %bb.0:
43-
; GISEL-NEXT: cmp w0, w1
44-
; GISEL-NEXT: cset w8, lo
45-
; GISEL-NEXT: cmp w2, w3
46-
; GISEL-NEXT: cset w9, hi
47-
; GISEL-NEXT: cmp w4, w5
48-
; GISEL-NEXT: and w8, w8, w9
49-
; GISEL-NEXT: cset w9, ne
50-
; GISEL-NEXT: and w0, w8, w9
51-
; GISEL-NEXT: ret
26+
; CHECK-LABEL: cmp_and3:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: cmp w0, w1
29+
; CHECK-NEXT: cset w8, lo
30+
; CHECK-NEXT: cmp w2, w3
31+
; CHECK-NEXT: cset w9, hi
32+
; CHECK-NEXT: cmp w4, w5
33+
; CHECK-NEXT: and w8, w8, w9
34+
; CHECK-NEXT: cset w9, ne
35+
; CHECK-NEXT: and w0, w8, w9
36+
; CHECK-NEXT: ret
5237
%7 = icmp ult i32 %0, %1
5338
%8 = icmp ugt i32 %2, %3
5439
%9 = select i1 %7, i1 %8, i1 false
@@ -60,29 +45,20 @@ define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
6045

6146
; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
6247
define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
63-
; SDISEL-LABEL: cmp_and4:
64-
; SDISEL: // %bb.0:
65-
; SDISEL-NEXT: cmp w2, w3
66-
; SDISEL-NEXT: ccmp w0, w1, #2, hi
67-
; SDISEL-NEXT: ccmp w4, w5, #4, lo
68-
; SDISEL-NEXT: ccmp w6, w7, #0, ne
69-
; SDISEL-NEXT: cset w0, eq
70-
; SDISEL-NEXT: ret
71-
;
72-
; GISEL-LABEL: cmp_and4:
73-
; GISEL: // %bb.0:
74-
; GISEL-NEXT: cmp w2, w3
75-
; GISEL-NEXT: cset w8, hi
76-
; GISEL-NEXT: cmp w0, w1
77-
; GISEL-NEXT: cset w9, lo
78-
; GISEL-NEXT: cmp w4, w5
79-
; GISEL-NEXT: cset w10, ne
80-
; GISEL-NEXT: cmp w6, w7
81-
; GISEL-NEXT: and w8, w8, w9
82-
; GISEL-NEXT: cset w11, eq
83-
; GISEL-NEXT: and w9, w10, w11
84-
; GISEL-NEXT: and w0, w8, w9
85-
; GISEL-NEXT: ret
48+
; CHECK-LABEL: cmp_and4:
49+
; CHECK: // %bb.0:
50+
; CHECK-NEXT: cmp w2, w3
51+
; CHECK-NEXT: cset w8, hi
52+
; CHECK-NEXT: cmp w0, w1
53+
; CHECK-NEXT: cset w9, lo
54+
; CHECK-NEXT: cmp w4, w5
55+
; CHECK-NEXT: cset w10, ne
56+
; CHECK-NEXT: cmp w6, w7
57+
; CHECK-NEXT: and w8, w8, w9
58+
; CHECK-NEXT: cset w11, eq
59+
; CHECK-NEXT: and w9, w10, w11
60+
; CHECK-NEXT: and w0, w8, w9
61+
; CHECK-NEXT: ret
8662
%9 = icmp ugt i32 %2, %3
8763
%10 = icmp ult i32 %0, %1
8864
%11 = select i1 %9, i1 %10, i1 false
@@ -96,22 +72,15 @@ define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
9672

9773
; (x0 < x1) || (x2 > x3)
9874
define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
99-
; SDISEL-LABEL: cmp_or2:
100-
; SDISEL: // %bb.0:
101-
; SDISEL-NEXT: cmp w0, w1
102-
; SDISEL-NEXT: ccmp w2, w3, #0, hs
103-
; SDISEL-NEXT: cset w0, ne
104-
; SDISEL-NEXT: ret
105-
;
106-
; GISEL-LABEL: cmp_or2:
107-
; GISEL: // %bb.0:
108-
; GISEL-NEXT: cmp w0, w1
109-
; GISEL-NEXT: cset w8, lo
110-
; GISEL-NEXT: cmp w2, w3
111-
; GISEL-NEXT: cset w9, ne
112-
; GISEL-NEXT: orr w8, w8, w9
113-
; GISEL-NEXT: and w0, w8, #0x1
114-
; GISEL-NEXT: ret
75+
; CHECK-LABEL: cmp_or2:
76+
; CHECK: // %bb.0:
77+
; CHECK-NEXT: cmp w0, w1
78+
; CHECK-NEXT: cset w8, lo
79+
; CHECK-NEXT: cmp w2, w3
80+
; CHECK-NEXT: cset w9, ne
81+
; CHECK-NEXT: orr w8, w8, w9
82+
; CHECK-NEXT: and w0, w8, #0x1
83+
; CHECK-NEXT: ret
11584
%5 = icmp ult i32 %0, %1
11685
%6 = icmp ne i32 %2, %3
11786
%7 = select i1 %5, i1 true, i1 %6
@@ -121,26 +90,18 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
12190

12291
; (x0 < x1) || (x2 > x3) || (x4 != x5)
12392
define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
124-
; SDISEL-LABEL: cmp_or3:
125-
; SDISEL: // %bb.0:
126-
; SDISEL-NEXT: cmp w0, w1
127-
; SDISEL-NEXT: ccmp w2, w3, #2, hs
128-
; SDISEL-NEXT: ccmp w4, w5, #0, ls
129-
; SDISEL-NEXT: cset w0, ne
130-
; SDISEL-NEXT: ret
131-
;
132-
; GISEL-LABEL: cmp_or3:
133-
; GISEL: // %bb.0:
134-
; GISEL-NEXT: cmp w0, w1
135-
; GISEL-NEXT: cset w8, lo
136-
; GISEL-NEXT: cmp w2, w3
137-
; GISEL-NEXT: cset w9, hi
138-
; GISEL-NEXT: cmp w4, w5
139-
; GISEL-NEXT: orr w8, w8, w9
140-
; GISEL-NEXT: cset w9, ne
141-
; GISEL-NEXT: orr w8, w8, w9
142-
; GISEL-NEXT: and w0, w8, #0x1
143-
; GISEL-NEXT: ret
93+
; CHECK-LABEL: cmp_or3:
94+
; CHECK: // %bb.0:
95+
; CHECK-NEXT: cmp w0, w1
96+
; CHECK-NEXT: cset w8, lo
97+
; CHECK-NEXT: cmp w2, w3
98+
; CHECK-NEXT: cset w9, hi
99+
; CHECK-NEXT: cmp w4, w5
100+
; CHECK-NEXT: orr w8, w8, w9
101+
; CHECK-NEXT: cset w9, ne
102+
; CHECK-NEXT: orr w8, w8, w9
103+
; CHECK-NEXT: and w0, w8, #0x1
104+
; CHECK-NEXT: ret
144105
%7 = icmp ult i32 %0, %1
145106
%8 = icmp ugt i32 %2, %3
146107
%9 = select i1 %7, i1 true, i1 %8
@@ -152,30 +113,21 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
152113

153114
; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
154115
define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
155-
; SDISEL-LABEL: cmp_or4:
156-
; SDISEL: // %bb.0:
157-
; SDISEL-NEXT: cmp w0, w1
158-
; SDISEL-NEXT: ccmp w2, w3, #2, hs
159-
; SDISEL-NEXT: ccmp w4, w5, #0, ls
160-
; SDISEL-NEXT: ccmp w6, w7, #4, eq
161-
; SDISEL-NEXT: cset w0, eq
162-
; SDISEL-NEXT: ret
163-
;
164-
; GISEL-LABEL: cmp_or4:
165-
; GISEL: // %bb.0:
166-
; GISEL-NEXT: cmp w0, w1
167-
; GISEL-NEXT: cset w8, lo
168-
; GISEL-NEXT: cmp w2, w3
169-
; GISEL-NEXT: cset w9, hi
170-
; GISEL-NEXT: cmp w4, w5
171-
; GISEL-NEXT: cset w10, ne
172-
; GISEL-NEXT: cmp w6, w7
173-
; GISEL-NEXT: orr w8, w8, w9
174-
; GISEL-NEXT: cset w11, eq
175-
; GISEL-NEXT: orr w9, w10, w11
176-
; GISEL-NEXT: orr w8, w8, w9
177-
; GISEL-NEXT: and w0, w8, #0x1
178-
; GISEL-NEXT: ret
116+
; CHECK-LABEL: cmp_or4:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: cmp w0, w1
119+
; CHECK-NEXT: cset w8, lo
120+
; CHECK-NEXT: cmp w2, w3
121+
; CHECK-NEXT: cset w9, hi
122+
; CHECK-NEXT: cmp w4, w5
123+
; CHECK-NEXT: cset w10, ne
124+
; CHECK-NEXT: cmp w6, w7
125+
; CHECK-NEXT: orr w8, w8, w9
126+
; CHECK-NEXT: cset w11, eq
127+
; CHECK-NEXT: orr w9, w10, w11
128+
; CHECK-NEXT: orr w8, w8, w9
129+
; CHECK-NEXT: and w0, w8, #0x1
130+
; CHECK-NEXT: ret
179131
%9 = icmp ult i32 %0, %1
180132
%10 = icmp ugt i32 %2, %3
181133
%11 = select i1 %9, i1 true, i1 %10
@@ -242,5 +194,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
242194
%9 = zext i1 %8 to i32
243195
ret i32 %9
244196
}
245-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
246-
; CHECK: {{.*}}

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
236236
; CHECK-NEXT: mov z7.d, z0.d
237237
; CHECK-NEXT: add x9, x9, x11
238238
; CHECK-NEXT: add x8, x8, x12
239-
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
239+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
240+
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
240241
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
241242
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
242243
; CHECK-NEXT: whilelo p1.d, x9, x10

llvm/test/CodeGen/AArch64/fast-isel-select.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
1+
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SISEL
2+
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FISEL
3+
; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GISEL
44

55
; First test the different supported value types for select.
66
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -295,22 +295,28 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
295295
; Test peephole optimizations for select.
296296
define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
297297
; CHECK-LABEL: select_opt1
298-
; CHECK: orr {{w[0-9]+}}, w0, w1
298+
; SISEL: orr [[REG:w[0-9]+]], w0, w1
299+
; SISEL: and w0, [[REG]], #0x1
300+
; FISEL: orr {{w[0-9]+}}, w0, w1
299301
%1 = select i1 %c, i1 true, i1 %a
300302
ret i1 %1
301303
}
302304

303305
define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
304306
; CHECK-LABEL: select_opt2
305-
; CHECK: eor [[REG:w[0-9]+]], w0, #0x1
306-
; CHECK: orr {{w[0-9]+}}, [[REG]], w1
307+
; SISEL: orn [[REG:w[0-9]+]], w1, w0
308+
; SISEL: and w0, [[REG]], #0x1
309+
; FISEL: eor [[REG:w[0-9]+]], w0, #0x1
310+
; FISEL: orr {{w[0-9]+}}, [[REG]], w1
307311
%1 = select i1 %c, i1 %a, i1 true
308312
ret i1 %1
309313
}
310314

311315
define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
312316
; CHECK-LABEL: select_opt3
313-
; CHECK: bic {{w[0-9]+}}, w1, w0
317+
; SISEL: eor [[REG:w[0-9]+]], w0, #0x1
318+
; SISEL: and w0, [[REG]], w1
319+
; FISEL: bic {{w[0-9]+}}, w1, w0
314320
%1 = select i1 %c, i1 false, i1 %a
315321
ret i1 %1
316322
}

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
213213
; CHECK-LABEL: ctz_and_nxv16i1:
214214
; CHECK: // %bb.0:
215215
; CHECK-NEXT: ptrue p1.b
216-
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
216+
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
217+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
217218
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
218219
; CHECK-NEXT: cntp x0, p0, p0.b
219220
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

0 commit comments

Comments
 (0)