Skip to content

Commit a42aa8f

Browse files
committed
[SLP]Fix adjusting of the mask for the fully matched nodes.
When checking for the poison elements in the matches node, need to consider the register number, when clearing the corresponding mask element. Fixes #119393
1 parent 13539c2 commit a42aa8f

File tree

2 files changed

+85
-2
lines changed

2 files changed

+85
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13222,9 +13222,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1322213222
copy(CommonMask, Mask.begin());
1322313223
}
1322413224
// Clear undef scalars.
13225-
for (int I = 0, Sz = VL.size(); I < Sz; ++I)
13225+
for (unsigned I : seq<unsigned>(VL.size()))
1322613226
if (isa<PoisonValue>(VL[I]))
13227-
Mask[I] = PoisonMaskElem;
13227+
Mask[Part * VL.size() + I] = PoisonMaskElem;
1322813228
return TargetTransformInfo::SK_PermuteSingleSrc;
1322913229
}
1323013230
// No perfect match, just shuffle, so choose the first tree node from the
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: define i32 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*]]:
7+
; CHECK-NEXT: br label %[[FUNC_135_EXIT_I:.*]]
8+
; CHECK: [[FUNC_135_EXIT_I]]:
9+
; CHECK-NEXT: [[G_228_PROMOTED166_I1105_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
11+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
12+
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 0, i32 0, i32 0, i32 poison>
13+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2>
14+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 2, i32 2, i32 poison>
16+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP5]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 28, i32 29, i32 30, i32 poison>
18+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7
19+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 0, i32 15
20+
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP3]], i64 0)
21+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 28, i32 29, i32 30, i32 31>
22+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
23+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer
24+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
25+
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> [[TMP14]])
26+
; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i16
27+
; CHECK-NEXT: br label %[[IF_THEN_SINK_SPLIT_I:.*]]
28+
; CHECK: [[FOR_COND7_PREHEADER_I1132_1_I:.*]]:
29+
; CHECK-NEXT: br label %[[IF_THEN_SINK_SPLIT_I]]
30+
; CHECK: [[IF_THEN_SINK_SPLIT_I]]:
31+
; CHECK-NEXT: [[XOR58_5_I_I1203_3_3_SINK_I:%.*]] = phi i16 [ 0, %[[FOR_COND7_PREHEADER_I1132_1_I]] ], [ [[TMP16]], %[[FUNC_135_EXIT_I]] ]
32+
; CHECK-NEXT: ret i32 0
33+
;
34+
entry:
35+
br label %func_135.exit.i
36+
37+
func_135.exit.i:
38+
%g_228.promoted166.i1105.i = phi i32 [ 0, %entry ]
39+
%cmp55.i.i1199.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
40+
%cmp55.1.i.i1200.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
41+
%cmp55.2.i.i1201.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
42+
%cmp55.3.i.i1202.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
43+
%0 = xor i1 %cmp55.2.i.i1201.i, %cmp55.3.i.i1202.i
44+
%1 = xor i1 %cmp55.1.i.i1200.i, %0
45+
%2 = xor i1 %cmp55.i.i1199.i, %1
46+
%dec.i.i1204.i = add i32 %g_228.promoted166.i1105.i, 0
47+
%cmp55.i.i1199.1.i = icmp ugt i32 %dec.i.i1204.i, 0
48+
%cmp55.1.i.i1200.1.i = icmp ugt i32 %dec.i.i1204.i, 0
49+
%cmp55.2.i.i1201.1.i = icmp ugt i32 %dec.i.i1204.i, 0
50+
%cmp55.3.i.i1202.1.i = icmp ugt i32 %dec.i.i1204.i, 0
51+
%3 = xor i1 %cmp55.2.i.i1201.1.i, %cmp55.3.i.i1202.1.i
52+
%4 = xor i1 %cmp55.1.i.i1200.1.i, %3
53+
%5 = xor i1 %cmp55.i.i1199.1.i, %4
54+
%6 = xor i1 %2, %5
55+
%dec.i.i1204.1.i = add i32 %g_228.promoted166.i1105.i, 0
56+
%cmp55.i.i1199.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
57+
%cmp55.1.i.i1200.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
58+
%cmp55.2.i.i1201.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
59+
%cmp55.3.i.i1202.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
60+
%7 = add i32 0, 0
61+
%8 = icmp ult i32 %7, 0
62+
%9 = xor i1 %cmp55.3.i.i1202.2.i, %8
63+
%10 = xor i1 %cmp55.2.i.i1201.2.i, %9
64+
%11 = xor i1 %cmp55.1.i.i1200.2.i, %10
65+
%12 = xor i1 %cmp55.i.i1199.2.i, %11
66+
%13 = xor i1 %12, %6
67+
%dec.i.i1204.2.i = add i32 %g_228.promoted166.i1105.i, 0
68+
%cmp55.i.i1199.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
69+
%cmp55.1.i.i1200.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
70+
%cmp55.2.i.i1201.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
71+
%14 = xor i1 %cmp55.1.i.i1200.3.i, %cmp55.2.i.i1201.3.i
72+
%15 = xor i1 %cmp55.i.i1199.3.i, %14
73+
%16 = xor i1 %15, %13
74+
%17 = zext i1 %16 to i16
75+
br label %if.then.sink.split.i
76+
77+
for.cond7.preheader.i1132.1.i:
78+
br label %if.then.sink.split.i
79+
80+
if.then.sink.split.i:
81+
%xor58.5.i.i1203.3.3.sink.i = phi i16 [ 0, %for.cond7.preheader.i1132.1.i ], [ %17, %func_135.exit.i ]
82+
ret i32 0
83+
}

0 commit comments

Comments
 (0)