Skip to content

Commit 529ad40

Browse files
authored
[PowerPC] Fix missing kill flag update for XVCVDPSP transformations (#67997)
Add transformed register to kill flag work list for XVCVDPSP tranformations. Ref: reviews.llvm.org/D133103
1 parent e18dca2 commit 529ad40

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,9 @@ bool PPCMIPeephole::simplifyCode() {
895895
LLVM_DEBUG(MI.dump());
896896
LLVM_DEBUG(dbgs() << "Through instruction:\n");
897897
LLVM_DEBUG(DefMI->dump());
898-
RoundInstr->eraseFromParent();
899898
addRegToUpdate(ConvReg1);
899+
addRegToUpdate(FRSPDefines);
900+
ToErase = RoundInstr;
900901
}
901902
};
902903

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc < %s -mtriple powerpc64le-unknown-linux-gnu
2+
; RUN: llc < %s -mtriple powerpc64-unknown-linux-gnu
3+
; RUN: llc < %s -mtriple powerpc-ibm-aix
4+
; RUN: llc < %s -mtriple powerpc64-ibm-aix
5+
6+
define void @xvcvdpsp_kill_flag() {
7+
entry:
8+
%call49 = tail call double @sin()
9+
%0 = insertelement <2 x double> poison, double %call49, i64 1
10+
%1 = fmul <2 x double> %0, zeroinitializer
11+
%2 = shufflevector <2 x double> %1, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
12+
%3 = insertelement <4 x double> %2, double 0.000000e+00, i64 2
13+
%4 = insertelement <4 x double> %3, double poison, i64 3
14+
%5 = fptrunc <4 x double> %4 to <4 x float>
15+
%6 = shufflevector <4 x float> %5, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
16+
%7 = shufflevector <4 x float> %5, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
17+
%8 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %7, <4 x float> <float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00>, <4 x float> zeroinitializer)
18+
br label %if.end1
19+
20+
if.end1: ; preds = %entry
21+
br i1 poison, label %for.cond1.preheader, label %if.then2
22+
23+
for.cond1.preheader: ; preds = %if.end1
24+
br label %for.body2.preheader
25+
26+
for.body2.preheader: ; preds = %for.cond1.preheader
27+
br i1 poison, label %for.loopexit, label %for.body3
28+
29+
for.body3: ; preds = %for.body2.preheader
30+
%9 = tail call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> zeroinitializer, <4 x float> %6, <4 x float> zeroinitializer)
31+
%10 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %8, <4 x float> %9)
32+
%11 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %10, <4 x float> zeroinitializer, <4 x float> zeroinitializer)
33+
store <4 x float> %11, ptr poison, align 16
34+
unreachable
35+
36+
for.loopexit: ; preds = %for.body2.preheader
37+
unreachable
38+
39+
if.then2: ; preds = %if.end1
40+
ret void
41+
}
42+
43+
declare double @sin() local_unnamed_addr #0
44+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
45+
declare <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float>, <4 x float>, <4 x float>)

0 commit comments

Comments
 (0)