Skip to content

Commit f70f122

Browse files
committed
[SLP]Fix PR101213: Reuse extractelement, only if its vector operand comes before new vector value.
When trying to reuse extractelement instruction, need to check that it is inserted into proper position. Its original vector operand should come before new vector value, otherwise new extractelement instruction must be generated. Fixes #101213
1 parent 8364a6e commit f70f122

File tree

2 files changed

+56
-2
lines changed

2 files changed

+56
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13909,11 +13909,16 @@ Value *BoUpSLP::vectorizeTree(
1390913909
}
1391013910
if (!Ex) {
1391113911
// "Reuse" the existing extract to improve final codegen.
13912-
if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
13912+
if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
13913+
ES && isa<Instruction>(Vec)) {
1391313914
Value *V = ES->getVectorOperand();
1391413915
if (const TreeEntry *ETE = getTreeEntry(V))
1391513916
V = ETE->VectorizedValue;
13916-
Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
13917+
if (auto *IV = dyn_cast<Instruction>(V);
13918+
!IV || IV == Vec || IV->comesBefore(cast<Instruction>(Vec)))
13919+
Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
13920+
else
13921+
Ex = Builder.CreateExtractElement(Vec, Lane);
1391713922
} else if (ReplaceGEP) {
1391813923
// Leave the GEPs as is, they are free in most cases and better to
1391913924
// keep them as GEPs.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br label %[[BB43:.*]]
8+
; CHECK: [[BB20:.*]]:
9+
; CHECK-NEXT: br label %[[BB105:.*]]
10+
; CHECK: [[BB43]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x ptr addrspace(1)> [ [[TMP1:%.*]], %[[BB51:.*]] ], [ zeroinitializer, %[[BB]] ]
12+
; CHECK-NEXT: br i1 false, label %[[BB105]], label %[[BB51]]
13+
; CHECK: [[BB51]]:
14+
; CHECK-NEXT: [[TMP1]] = phi <2 x ptr addrspace(1)> [ poison, %[[BB54:.*]] ], [ zeroinitializer, %[[BB43]] ]
15+
; CHECK-NEXT: br label %[[BB43]]
16+
; CHECK: [[BB54]]:
17+
; CHECK-NEXT: br label %[[BB51]]
18+
; CHECK: [[BB105]]:
19+
; CHECK-NEXT: [[PHI106:%.*]] = phi ptr addrspace(1) [ null, %[[BB20]] ], [ null, %[[BB43]] ]
20+
; CHECK-NEXT: ret void
21+
;
22+
bb:
23+
%0 = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <2 x i32> <i32 1, i32 0>
24+
%1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
25+
%2 = extractelement <2 x ptr addrspace(1)> %0, i32 1
26+
br label %bb43
27+
28+
bb20:
29+
br label %bb105
30+
31+
bb43:
32+
%phi441 = phi ptr addrspace(1) [ %4, %bb51 ], [ %2, %bb ]
33+
%phi452 = phi ptr addrspace(1) [ %5, %bb51 ], [ %1, %bb ]
34+
br i1 false, label %bb105, label %bb51
35+
36+
bb51:
37+
%3 = phi <2 x ptr addrspace(1)> [ poison, %bb54 ], [ zeroinitializer, %bb43 ]
38+
%4 = extractelement <2 x ptr addrspace(1)> %3, i32 0
39+
%5 = extractelement <2 x ptr addrspace(1)> %3, i32 1
40+
br label %bb43
41+
42+
bb54:
43+
br label %bb51
44+
45+
bb105:
46+
%phi106 = phi ptr addrspace(1) [ %1, %bb20 ], [ null, %bb43 ]
47+
ret void
48+
}
49+

0 commit comments

Comments
 (0)