Skip to content

Commit 1de3f46

Browse files
committed
Revert "[SLP]Do not require external uses for roots and single use for other instructions in computeMinimumValueSizes. (#72679)"
This reverts commit 408dce8. This triggered failed asserts with code like this: char a[]; short *b; int c, d, e, f; void g() { char *h; for (;;) { for (; f; ++f) { h[f] = b[0] * a[e] + b[c] * a[1] >> 7; ++b; } h += d; } } Compiled like this: $ clang -target x86_64-linux-gnu -c repro.c -O2 clang: ../lib/IR/Instructions.cpp:3335: static llvm::CastInst* llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, const llvm::Twine&, llvm::Instruction*): Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed.
1 parent 1ee93ac commit 1de3f46

File tree

4 files changed

+51
-35
lines changed

4 files changed

+51
-35
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13096,14 +13096,10 @@ bool BoUpSLP::collectValuesToDemote(
1309613096
if (isa<Constant>(V))
1309713097
return true;
1309813098

13099-
// If the value is not a vectorized instruction in the expression and not used
13100-
// by the insertelement instruction and not used in multiple vector nodes, it
13101-
// cannot be demoted.
13099+
// If the value is not a vectorized instruction in the expression with only
13100+
// one use, it cannot be demoted.
1310213101
auto *I = dyn_cast<Instruction>(V);
13103-
if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) ||
13104-
!Visited.insert(I).second || all_of(I->users(), [&](User *U) {
13105-
return isa<InsertElementInst>(U) && !getTreeEntry(U);
13106-
}))
13102+
if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
1310713103
return false;
1310813104

1310913105
unsigned Start = 0;
@@ -13174,6 +13170,11 @@ bool BoUpSLP::collectValuesToDemote(
1317413170
}
1317513171

1317613172
void BoUpSLP::computeMinimumValueSizes() {
13173+
// If there are no external uses, the expression tree must be rooted by a
13174+
// store. We can't demote in-memory values, so there is nothing to do here.
13175+
if (ExternalUses.empty())
13176+
return;
13177+
1317713178
// We only attempt to truncate integer expressions.
1317813179
auto &TreeRoot = VectorizableTree[0]->Scalars;
1317913180
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());

llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,19 @@ define void @t(i64 %v) {
55
; CHECK-LABEL: define void @t(
66
; CHECK-SAME: i64 [[V:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0
9-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16>
11-
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], <i16 5, i16 6, i16 3, i16 2>
12-
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]])
13-
; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
14-
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535
15-
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
8+
; CHECK-NEXT: [[CONV12_1_I:%.*]] = trunc i64 [[V]] to i32
9+
; CHECK-NEXT: [[MUL_I_1_I:%.*]] = mul i32 [[CONV12_1_I]], 2
10+
; CHECK-NEXT: [[CONV12_I:%.*]] = trunc i64 [[V]] to i32
11+
; CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[CONV12_I]], 3
12+
; CHECK-NEXT: [[CONV14104_I:%.*]] = or i32 [[MUL_I_1_I]], [[MUL_I_I]]
13+
; CHECK-NEXT: [[CONV12_1_I_1:%.*]] = trunc i64 [[V]] to i32
14+
; CHECK-NEXT: [[MUL_I_1_I_1:%.*]] = mul i32 [[CONV12_1_I_1]], 6
15+
; CHECK-NEXT: [[CONV12_I_1:%.*]] = trunc i64 [[V]] to i32
16+
; CHECK-NEXT: [[MUL_I_I_1:%.*]] = mul i32 [[CONV12_I_1]], 5
17+
; CHECK-NEXT: [[CONV14104_I_1:%.*]] = or i32 [[MUL_I_1_I_1]], [[MUL_I_I_1]]
18+
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[CONV14104_I]], [[CONV14104_I_1]]
19+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 65535
20+
; CHECK-NEXT: store i32 [[TMP1]], ptr null, align 4
1621
; CHECK-NEXT: ret void
1722
;
1823
entry:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,24 @@ define void @test(i64 %d.promoted.i) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]]
9-
; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0
10-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
11-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9
12-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1>
13-
; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer
14-
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
15-
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
16-
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
17-
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[D_PROMOTED_I]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i1>
10+
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> zeroinitializer, [[TMP1]]
11+
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i1> [[TMP2]], zeroinitializer
12+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i1> [[TMP3]], zeroinitializer
13+
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i1> [[TMP4]], zeroinitializer
14+
; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP5]], zeroinitializer
15+
; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i1> [[TMP6]], zeroinitializer
16+
; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i1> [[TMP7]], zeroinitializer
17+
; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i1> [[TMP8]], zeroinitializer
18+
; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP9]], zeroinitializer
19+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
20+
; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32
21+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
22+
; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i32
23+
; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP12]], [[TMP14]]
24+
; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 0
25+
; CHECK-NEXT: store i32 [[TMP16]], ptr null, align 4
1826
; CHECK-NEXT: ret void
1927
;
2028
entry:

llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,20 @@ define i1 @test() {
88
; CHECK: then:
99
; CHECK-NEXT: br label [[ELSE]]
1010
; CHECK: else:
11-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
13-
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
14-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
15-
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i32>
13+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
14+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
15+
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
16+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP1]], <2 x i32> <i32 3, i32 1>
17+
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
1618
; CHECK-NEXT: br label [[ELSE1:%.*]]
1719
; CHECK: else1:
18-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
19-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[BF_CAST162]], i32 0
20-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i32> [[TMP4]], zeroinitializer
21-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
22-
; CHECK-NEXT: ret i1 [[TMP6]]
20+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
21+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0
22+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
23+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
24+
; CHECK-NEXT: ret i1 [[TMP8]]
2325
;
2426
entry:
2527
br i1 false, label %then, label %else

0 commit comments

Comments
 (0)