Skip to content

Commit 100fd0c

Browse files
committed
[SLP]Fix a crash when trying to identify one source order
Need to check that order index is not out-of-boundaries when trying to detect that the reuse mask is one-source-mask with clusters to fix compiler crash
1 parent 82ce829 commit 100fd0c

File tree

2 files changed

+155
-2
lines changed

2 files changed

+155
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5373,8 +5373,11 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
53735373
UndefCnt > Sz / 2)
53745374
return std::nullopt;
53755375
UsedVals.set(Val);
5376-
for (unsigned K = 0; K < NumParts; ++K)
5377-
ResOrder[Val + Sz * K] = I + K;
5376+
for (unsigned K = 0; K < NumParts; ++K) {
5377+
unsigned Idx = Val + Sz * K;
5378+
if (Idx < VF)
5379+
ResOrder[Idx] = I + K;
5380+
}
53785381
}
53795382
return std::move(ResOrder);
53805383
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-1000 < %s | FileCheck %s
3+
4+
define void @e(ptr %c, i64 %0) {
5+
; CHECK-LABEL: define void @e(
6+
; CHECK-SAME: ptr [[C:%.*]], i64 [[TMP0:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C]], align 8
9+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
10+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 112
11+
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
12+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8
13+
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX]], align 8
14+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0
15+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <2 x i32> zeroinitializer
16+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x ptr> poison, ptr [[TMP2]], i32 2
17+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <6 x ptr> [[TMP7]], ptr [[TMP1]], i32 3
18+
; CHECK-NEXT: [[TMP9:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP8]], <2 x ptr> [[TMP4]], i64 0)
19+
; CHECK-NEXT: [[TMP10:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP9]], <2 x ptr> [[TMP6]], i64 4)
20+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint <6 x ptr> [[TMP10]] to <6 x i64>
21+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5>
22+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0
23+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer
24+
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]]
25+
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
26+
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP16]])
27+
; CHECK-NEXT: br i1 [[TMP17]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]]
28+
; CHECK: [[VECTOR_PH]]:
29+
; CHECK-NEXT: ret void
30+
; CHECK: [[FOR_BODY]]:
31+
; CHECK-NEXT: ret void
32+
;
33+
entry:
34+
%1 = load ptr, ptr %c, align 8
35+
%arrayidx = getelementptr i8, ptr %1, i64 96
36+
%arrayidx1 = getelementptr i8, ptr %1, i64 112
37+
%2 = load ptr, ptr %arrayidx1, align 8
38+
%arrayidx5 = getelementptr i8, ptr %1, i64 104
39+
%3 = load ptr, ptr %arrayidx5, align 8
40+
%4 = load ptr, ptr %arrayidx, align 8
41+
%5 = load ptr, ptr %c, align 8
42+
%6 = ptrtoint ptr %5 to i64
43+
%7 = ptrtoint ptr %5 to i64
44+
%8 = ptrtoint ptr %1 to i64
45+
%9 = ptrtoint ptr %4 to i64
46+
%10 = ptrtoint ptr %3 to i64
47+
%11 = ptrtoint ptr %2 to i64
48+
%12 = or i64 %0, %11
49+
%dc64 = icmp ult i64 %12, 16
50+
%13 = or i64 %0, %11
51+
%dc65 = icmp ult i64 %13, 16
52+
%cr66 = or i1 %dc64, %dc65
53+
%14 = or i64 %0, %11
54+
%dc67 = icmp ult i64 %14, 16
55+
%cr68 = or i1 %cr66, %dc67
56+
%15 = or i64 %0, %11
57+
%dc69 = icmp ult i64 %15, 16
58+
%cr70 = or i1 %cr68, %dc69
59+
%16 = or i64 %0, %11
60+
%dc71 = icmp ult i64 %16, 16
61+
%cr72 = or i1 %cr70, %dc71
62+
%17 = or i64 %0, %11
63+
%dc73 = icmp ult i64 %17, 16
64+
%cr74 = or i1 %cr72, %dc73
65+
%18 = or i64 %0, %11
66+
%dc75 = icmp ult i64 %18, 16
67+
%cr76 = or i1 %cr74, %dc75
68+
%19 = or i64 %0, %10
69+
%dc77 = icmp ult i64 %19, 16
70+
%cr78 = or i1 %cr76, %dc77
71+
%20 = or i64 %0, %10
72+
%dc79 = icmp ult i64 %20, 16
73+
%cr80 = or i1 %cr78, %dc79
74+
%21 = or i64 %0, %10
75+
%dc81 = icmp ult i64 %21, 16
76+
%cr82 = or i1 %cr80, %dc81
77+
%22 = or i64 %0, %10
78+
%dc83 = icmp ult i64 %22, 16
79+
%cr84 = or i1 %cr82, %dc83
80+
%23 = or i64 %0, %10
81+
%dc85 = icmp ult i64 %23, 16
82+
%cr86 = or i1 %cr84, %dc85
83+
%24 = or i64 %0, %10
84+
%dc87 = icmp ult i64 %24, 16
85+
%cr88 = or i1 %cr86, %dc87
86+
%25 = or i64 %0, %10
87+
%dc89 = icmp ult i64 %25, 16
88+
%cr90 = or i1 %cr88, %dc89
89+
%26 = or i64 %0, %9
90+
%dc91 = icmp ult i64 %26, 16
91+
%cr92 = or i1 %cr90, %dc91
92+
%27 = or i64 %0, %9
93+
%dc93 = icmp ult i64 %27, 16
94+
%cr94 = or i1 %cr92, %dc93
95+
%28 = or i64 %0, %9
96+
%dc95 = icmp ult i64 %28, 16
97+
%cr96 = or i1 %cr94, %dc95
98+
%29 = or i64 %0, %9
99+
%dc97 = icmp ult i64 %29, 16
100+
%cr98 = or i1 %cr96, %dc97
101+
%30 = or i64 %0, %9
102+
%dc99 = icmp ult i64 %30, 16
103+
%cr100 = or i1 %cr98, %dc99
104+
%31 = or i64 %0, %9
105+
%dc101 = icmp ult i64 %31, 16
106+
%cr102 = or i1 %cr100, %dc101
107+
%32 = or i64 %0, %8
108+
%dc103 = icmp ult i64 %32, 16
109+
%cr104 = or i1 %cr102, %dc103
110+
%33 = or i64 %0, %8
111+
%dc105 = icmp ult i64 %33, 16
112+
%cr106 = or i1 %cr104, %dc105
113+
%34 = or i64 %0, %8
114+
%dc107 = icmp ult i64 %34, 16
115+
%cr108 = or i1 %cr106, %dc107
116+
%35 = or i64 %0, %8
117+
%dc109 = icmp ult i64 %35, 16
118+
%cr110 = or i1 %cr108, %dc109
119+
%36 = or i64 %0, %8
120+
%dc111 = icmp ult i64 %36, 16
121+
%cr112 = or i1 %cr110, %dc111
122+
%37 = or i64 %0, %7
123+
%dc113 = icmp ult i64 %37, 16
124+
%cr114 = or i1 %cr112, %dc113
125+
%38 = or i64 %0, %7
126+
%dc115 = icmp ult i64 %38, 16
127+
%cr116 = or i1 %cr114, %dc115
128+
%39 = or i64 %0, %7
129+
%dc117 = icmp ult i64 %39, 16
130+
%cr118 = or i1 %cr116, %dc117
131+
%40 = or i64 %0, %7
132+
%dc119 = icmp ult i64 %40, 16
133+
%cr120 = or i1 %cr118, %dc119
134+
%41 = or i64 %0, %6
135+
%dc121 = icmp ult i64 %41, 16
136+
%cr122 = or i1 %cr120, %dc121
137+
%42 = or i64 %0, %6
138+
%dc123 = icmp ult i64 %42, 16
139+
%cr124 = or i1 %cr122, %dc123
140+
%43 = or i64 %0, %6
141+
%dc125 = icmp ult i64 %43, 16
142+
%cr126 = or i1 %cr124, %dc125
143+
br i1 %cr126, label %for.body, label %vector.ph
144+
145+
vector.ph:
146+
ret void
147+
148+
for.body:
149+
ret void
150+
}

0 commit comments

Comments
 (0)