Skip to content

Commit d79fdb2

Browse files
committed
[SLP]Fix PR78236: correctly track external values, replaced several
times during reduction vectorization. If the external value was replaced in the vectorizer several times during reduction vectorization, need to find the original value to correctly handle external uses and emit extractelement instructions properly.
1 parent 279dfe7 commit d79fdb2

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14794,8 +14794,17 @@ class HorizontalReduction {
1479414794
LocalExternallyUsedValues[RdxVal];
1479514795
// Update LocalExternallyUsedValues for the scalar, replaced by
1479614796
// extractelement instructions.
14797+
DenseMap<Value *, Value *> ReplacementToExternal;
14798+
for (const std::pair<Value *, Value *> &Pair : ReplacedExternals)
14799+
ReplacementToExternal.try_emplace(Pair.second, Pair.first);
1479714800
for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
14798-
auto *It = ExternallyUsedValues.find(Pair.first);
14801+
Value *Ext = Pair.first;
14802+
auto RIt = ReplacementToExternal.find(Ext);
14803+
while (RIt != ReplacementToExternal.end()) {
14804+
Ext = RIt->second;
14805+
RIt = ReplacementToExternal.find(Ext);
14806+
}
14807+
auto *It = ExternallyUsedValues.find(Ext);
1479914808
if (It == ExternallyUsedValues.end())
1480014809
continue;
1480114810
LocalExternallyUsedValues[Pair.second].append(It->second);
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S -slp-threshold=-14 < %s | FileCheck %s
3+
4+
define void @test(i32 %0, ptr %p) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[TMP0:%.*]], ptr [[P:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 0, i32 poison>, i32 [[TMP0]], i32 3
9+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 1, i32 0, i32 1, i32 0>
10+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
11+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[PH:%.*]]
12+
; CHECK: ph:
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 0
14+
; CHECK-NEXT: br label [[EXIT]]
15+
; CHECK: exit:
16+
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP2]], [[ENTRY:%.*]] ], [ zeroinitializer, [[PH]] ]
17+
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP2]], [[ENTRY]] ], [ [[TMP4]], [[PH]] ]
18+
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP2]], [[ENTRY]] ], [ zeroinitializer, [[PH]] ]
19+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
20+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]])
21+
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP8]], [[TMP9]]
22+
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP7]])
23+
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[OP_RDX]], [[TMP10]]
24+
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX1]], [[TMP3]]
25+
; CHECK-NEXT: store i32 [[OP_RDX2]], ptr [[P]], align 4
26+
; CHECK-NEXT: ret void
27+
;
28+
entry:
29+
%xor.1.i = xor i32 1, 0
30+
%xor.2.i = xor i32 1, 0
31+
%xor.3.i = xor i32 1, 0
32+
%xor.4.i = xor i32 %0, 0
33+
br i1 false, label %exit, label %ph
34+
35+
ph:
36+
br label %exit
37+
38+
exit:
39+
%p1 = phi i32 [ %xor.1.i, %entry ], [ 0, %ph ]
40+
%p2 = phi i32 [ %xor.2.i, %entry ], [ 0, %ph ]
41+
%p3 = phi i32 [ %xor.3.i, %entry ], [ 0, %ph ]
42+
%p4 = phi i32 [ %xor.4.i, %entry ], [ 0, %ph ]
43+
%p5 = phi i32 [ %xor.1.i, %entry ], [ %0, %ph ]
44+
%p6 = phi i32 [ %xor.2.i, %entry ], [ 0, %ph ]
45+
%p7 = phi i32 [ %xor.3.i, %entry ], [ 0, %ph ]
46+
%p8 = phi i32 [ %xor.4.i, %entry ], [ 0, %ph ]
47+
%p9 = phi i32 [ %xor.4.i, %entry ], [ 0, %ph ]
48+
%p10 = phi i32 [ %xor.3.i, %entry ], [ 0, %ph ]
49+
%p11 = phi i32 [ %xor.2.i, %entry ], [ 0, %ph ]
50+
%p12 = phi i32 [ %xor.1.i, %entry ], [ 0, %ph ]
51+
%or.1.1.i = or i32 %xor.4.i, %p1
52+
%or.2.1.i = or i32 %or.1.1.i, %p2
53+
%or.3.1.i = or i32 %or.2.1.i, %p3
54+
%or.4.1.i = or i32 %or.3.1.i, %p4
55+
%or.1.2.i = or i32 %or.4.1.i, %p5
56+
%or.2.2.i = or i32 %or.1.2.i, %p6
57+
%or.3.2.i = or i32 %or.2.2.i, %p7
58+
%or.4.2.i = or i32 %or.3.2.i, %p8
59+
%or.326.i = or i32 %or.4.2.i, %p9
60+
%or.1.3.i = or i32 %or.326.i, %p10
61+
%or.2.3.i = or i32 %or.1.3.i, %p11
62+
%or.3.3.i = or i32 %or.2.3.i, %p12
63+
store i32 %or.3.3.i, ptr %p, align 4
64+
ret void
65+
}

0 commit comments

Comments
 (0)