Skip to content

Commit 408dce8

Browse files
[SLP]Do not require external uses for roots and single use for other instructions in computeMinimumValueSizes. (#72679)
After changes, that does not require support from InstCombine, we can drop some extra requirements for values-to-be-demoted. No need to check for external uses for roots/other instructions, just check that the no non-vectorized insertelement instruction, which may require widening.
1 parent e6c2952 commit 408dce8

File tree

4 files changed

+35
-51
lines changed

4 files changed

+35
-51
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -13096,10 +13096,14 @@ bool BoUpSLP::collectValuesToDemote(
1309613096
if (isa<Constant>(V))
1309713097
return true;
1309813098

13099-
// If the value is not a vectorized instruction in the expression with only
13100-
// one use, it cannot be demoted.
13099+
// If the value is not a vectorized instruction in the expression and not used
13100+
// by the insertelement instruction and not used in multiple vector nodes, it
13101+
// cannot be demoted.
1310113102
auto *I = dyn_cast<Instruction>(V);
13102-
if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
13103+
if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) ||
13104+
!Visited.insert(I).second || all_of(I->users(), [&](User *U) {
13105+
return isa<InsertElementInst>(U) && !getTreeEntry(U);
13106+
}))
1310313107
return false;
1310413108

1310513109
unsigned Start = 0;
@@ -13170,11 +13174,6 @@ bool BoUpSLP::collectValuesToDemote(
1317013174
}
1317113175

1317213176
void BoUpSLP::computeMinimumValueSizes() {
13173-
// If there are no external uses, the expression tree must be rooted by a
13174-
// store. We can't demote in-memory values, so there is nothing to do here.
13175-
if (ExternalUses.empty())
13176-
return;
13177-
1317813177
// We only attempt to truncate integer expressions.
1317913178
auto &TreeRoot = VectorizableTree[0]->Scalars;
1318013179
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());

llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

+8-13
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,14 @@ define void @t(i64 %v) {
55
; CHECK-LABEL: define void @t(
66
; CHECK-SAME: i64 [[V:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[CONV12_1_I:%.*]] = trunc i64 [[V]] to i32
9-
; CHECK-NEXT: [[MUL_I_1_I:%.*]] = mul i32 [[CONV12_1_I]], 2
10-
; CHECK-NEXT: [[CONV12_I:%.*]] = trunc i64 [[V]] to i32
11-
; CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[CONV12_I]], 3
12-
; CHECK-NEXT: [[CONV14104_I:%.*]] = or i32 [[MUL_I_1_I]], [[MUL_I_I]]
13-
; CHECK-NEXT: [[CONV12_1_I_1:%.*]] = trunc i64 [[V]] to i32
14-
; CHECK-NEXT: [[MUL_I_1_I_1:%.*]] = mul i32 [[CONV12_1_I_1]], 6
15-
; CHECK-NEXT: [[CONV12_I_1:%.*]] = trunc i64 [[V]] to i32
16-
; CHECK-NEXT: [[MUL_I_I_1:%.*]] = mul i32 [[CONV12_I_1]], 5
17-
; CHECK-NEXT: [[CONV14104_I_1:%.*]] = or i32 [[MUL_I_1_I_1]], [[MUL_I_I_1]]
18-
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[CONV14104_I]], [[CONV14104_I_1]]
19-
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 65535
20-
; CHECK-NEXT: store i32 [[TMP1]], ptr null, align 4
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16>
11+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], <i16 5, i16 6, i16 3, i16 2>
12+
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]])
13+
; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
14+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535
15+
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
2116
; CHECK-NEXT: ret void
2217
;
2318
entry:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll

+10-18
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,16 @@ define void @test(i64 %d.promoted.i) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[D_PROMOTED_I]], i32 0
9-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i1>
10-
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> zeroinitializer, [[TMP1]]
11-
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i1> [[TMP2]], zeroinitializer
12-
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i1> [[TMP3]], zeroinitializer
13-
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i1> [[TMP4]], zeroinitializer
14-
; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP5]], zeroinitializer
15-
; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i1> [[TMP6]], zeroinitializer
16-
; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i1> [[TMP7]], zeroinitializer
17-
; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i1> [[TMP8]], zeroinitializer
18-
; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP9]], zeroinitializer
19-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
20-
; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32
21-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
22-
; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i32
23-
; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP12]], [[TMP14]]
24-
; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 0
25-
; CHECK-NEXT: store i32 [[TMP16]], ptr null, align 4
8+
; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]]
9+
; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9
12+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer
14+
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
16+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
17+
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
2618
; CHECK-NEXT: ret void
2719
;
2820
entry:

llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll

+10-12
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,18 @@ define i1 @test() {
88
; CHECK: then:
99
; CHECK-NEXT: br label [[ELSE]]
1010
; CHECK: else:
11-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12-
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i32>
13-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
14-
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
15-
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
16-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP1]], <2 x i32> <i32 3, i32 1>
17-
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
13+
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
14+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
15+
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
1816
; CHECK-NEXT: br label [[ELSE1:%.*]]
1917
; CHECK: else1:
20-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
21-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0
22-
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
23-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
24-
; CHECK-NEXT: ret i1 [[TMP8]]
18+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
19+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[BF_CAST162]], i32 0
20+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i32> [[TMP4]], zeroinitializer
21+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
22+
; CHECK-NEXT: ret i1 [[TMP6]]
2523
;
2624
entry:
2725
br i1 false, label %then, label %else

0 commit comments

Comments
 (0)