Skip to content

Commit b65b2b4

Browse files
committed
[SLP]Expand vector to the whole register size in extracts adjustment
Need to expand the number of elements to the whole register to correctly process estimation and avoid compiler crash. Fixes #113462
1 parent 3605d9a commit b65b2b4

File tree

2 files changed

+105
-5
lines changed

2 files changed

+105
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9840,13 +9840,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
98409840
::getShuffleCost(TTI, *RegShuffleKind,
98419841
getWidenedType(ScalarTy, EltsPerVector), SubMask);
98429842
}
9843+
const unsigned BaseVF = getFullVectorNumberOfElements(
9844+
*R.TTI, VL.front()->getType(), alignTo(NumElts, EltsPerVector));
98439845
for (unsigned Idx : Indices) {
9844-
assert((Idx + EltsPerVector) <= alignTo(NumElts, EltsPerVector) &&
9846+
assert((Idx + EltsPerVector) <= BaseVF &&
98459847
"SK_ExtractSubvector index out of range");
9846-
Cost += ::getShuffleCost(
9847-
TTI, TTI::SK_ExtractSubvector,
9848-
getWidenedType(ScalarTy, alignTo(NumElts, EltsPerVector)), {},
9849-
CostKind, Idx, getWidenedType(ScalarTy, EltsPerVector));
9848+
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
9849+
getWidenedType(ScalarTy, BaseVF), {}, CostKind,
9850+
Idx, getWidenedType(ScalarTy, EltsPerVector));
98509851
}
98519852
// Second attempt to check, if just a permute is better estimated than
98529853
// subvector extract.
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
3+
4+
define i32 @test(i32 %v, ptr %p) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[P]], align 4
9+
; CHECK-NEXT: br i1 false, label %[[INC:.*]], label %[[PH:.*]]
10+
; CHECK: [[PH]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
12+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
13+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
14+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 0>, i32 [[V]], i32 13
15+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i32> [[TMP3]], zeroinitializer
16+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> [[TMP4]], <4 x i32> <i32 0, i32 31, i32 poison, i32 poison>
18+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> <i1 poison, i1 poison, i1 false, i1 false>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
19+
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> zeroinitializer, <4 x i64> zeroinitializer
20+
; CHECK-NEXT: [[I8_I_I:%.*]] = select i1 false, i64 0, i64 0
21+
; CHECK-NEXT: [[I9_I_I:%.*]] = select i1 false, i64 0, i64 0
22+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP8]])
23+
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[I8_I_I]]
24+
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX1]], [[I9_I_I]]
25+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP4]]
26+
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP10]])
27+
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP2]]
28+
; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
29+
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP11]], i1 true, i1 [[TMP13]]
30+
; CHECK-NEXT: [[AND252_US_I_24_I_I:%.*]] = select i1 [[OP_RDX]], i32 0, i32 0
31+
; CHECK-NEXT: br label %[[INC]]
32+
; CHECK: [[INC]]:
33+
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[AND252_US_I_24_I_I]], %[[PH]] ], [ 0, %[[ENTRY]] ]
34+
; CHECK-NEXT: [[P2:%.*]] = phi i64 [ [[OP_RDX2]], %[[PH]] ], [ 0, %[[ENTRY]] ]
35+
; CHECK-NEXT: ret i32 0
36+
;
37+
entry:
38+
%ld = load i32, ptr %p, align 4
39+
br i1 false, label %inc, label %ph
40+
41+
ph:
42+
%bi.i.not = icmp eq i32 %ld, 0
43+
%b1.i.i = icmp eq i32 %ld, 0
44+
%b3.i.i = icmp eq i32 %ld, 0
45+
%0 = or i1 %b3.i.i, %b1.i.i
46+
%b4.i.i = icmp eq i32 %ld, 0
47+
%i4.i.i = select i1 %b4.i.i, i64 0, i64 0
48+
%b5.i.i = icmp eq i32 0, 0
49+
%i5.i.i = select i1 %b5.i.i, i64 0, i64 0
50+
%inc34.5.i.i = or i64 %i4.i.i, %i5.i.i
51+
%1 = or i1 %b5.i.i, %b4.i.i
52+
%i6.i.i = select i1 false, i64 0, i64 0
53+
%inc34.6.i.i = or i64 %inc34.5.i.i, %i6.i.i
54+
%b7.i.i = icmp eq i32 0, 0
55+
%i7.i.i = select i1 false, i64 0, i64 0
56+
%inc34.7.i.i = or i64 %inc34.6.i.i, %i7.i.i
57+
%i8.i.i = select i1 false, i64 0, i64 0
58+
%inc34.8.i.i = or i64 %inc34.7.i.i, %i8.i.i
59+
%i9.i.i = select i1 false, i64 0, i64 0
60+
%inc34.9.i.i = or i64 %inc34.8.i.i, %i9.i.i
61+
%b10.i.i = icmp eq i32 0, 0
62+
%b11.i.i = icmp eq i32 0, 0
63+
%2 = or i1 %b11.i.i, %b10.i.i
64+
%b12.i.i = icmp eq i32 %v, 0
65+
%3 = or i1 %b12.i.i, %2
66+
%b13.i.i = icmp eq i32 0, 0
67+
%b14.i.i = icmp eq i32 0, 0
68+
%4 = or i1 %b14.i.i, %b13.i.i
69+
%b16.i.i = icmp eq i32 0, 0
70+
%b17.i.i = icmp eq i32 0, 0
71+
%5 = or i1 %b17.i.i, %b16.i.i
72+
%b18.i.i = icmp eq i32 0, 0
73+
%6 = or i1 %b18.i.i, %5
74+
%b19.i.i = icmp eq i32 0, 0
75+
%b20.i.i = icmp eq i32 0, 0
76+
%7 = or i1 %b20.i.i, %b19.i.i
77+
%b21.i.i = icmp eq i32 0, 0
78+
%8 = or i1 %b21.i.i, %7
79+
%b22.i.i = icmp eq i32 0, 0
80+
%b23.i.i = icmp eq i32 0, 0
81+
%9 = or i1 %b23.i.i, %b22.i.i
82+
%b24.i.i = icmp eq i32 0, 0
83+
%10 = or i1 %b24.i.i, %9
84+
%11 = select i1 %10, i1 true, i1 %8
85+
%12 = select i1 %11, i1 true, i1 %6
86+
%13 = select i1 %12, i1 true, i1 %4
87+
%14 = select i1 %13, i1 true, i1 %3
88+
%15 = select i1 %14, i1 true, i1 %b7.i.i
89+
%16 = select i1 %15, i1 true, i1 %1
90+
%17 = or i1 %0, %bi.i.not
91+
%18 = select i1 %16, i1 true, i1 %17
92+
%and252.us.i.24.i.i = select i1 %18, i32 0, i32 0
93+
br label %inc
94+
95+
inc:
96+
%p1 = phi i32 [ %and252.us.i.24.i.i, %ph ], [ 0, %entry ]
97+
%p2 = phi i64 [ %inc34.9.i.i, %ph ], [ 0, %entry ]
98+
ret i32 0
99+
}

0 commit comments

Comments
 (0)