Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit e2e26b4

Browse files
author
Chad Rosier
committed
[AArch64] Remove an overly conservative check when generating store pairs.
Store instructions do not modify register values and therefore it's safe to form a store pair even if the source register has been read in between the two store instructions. Previously, the read of w1 (see below) prevented the formation of a stp. str w0, [x2] ldr w8, [x2, #8] add w0, w8, w1 str w1, [x2, #4] ret We now generate the following code. stp w0, w1, [x2] ldr w8, [x2, #8] add w0, w8, w1 ret All correctness tests with -Ofast on A57 with Spec200x and EEMBC pass. Performance results for SPEC2K were within noise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239432 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a96fc7a commit e2e26b4

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
623623
// and first alias with the second, we can combine the second into the
624624
// first.
625625
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
626-
!UsedRegs[MI->getOperand(0).getReg()] &&
626+
!(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
627627
!mayAlias(MI, MemInsns, TII)) {
628628
MergeForward = false;
629629
return MBBI;
@@ -634,7 +634,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
634634
// first and the second alias with the first, we can combine the first
635635
// into the second.
636636
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
637-
!UsedRegs[FirstMI->getOperand(0).getReg()] &&
637+
!(FirstMI->mayLoad() &&
638+
UsedRegs[FirstMI->getOperand(0).getReg()]) &&
638639
!mayAlias(FirstMI, MemInsns, TII)) {
639640
MergeForward = true;
640641
return MBBI;

test/CodeGen/AArch64/arm64-stp.ll

+32
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,35 @@ entry:
9999
store <4 x i32> %p20, <4 x i32>* %p21, align 4
100100
ret void
101101
}
102+
103+
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
104+
; CHECK-LABEL: stp_int_rar_hazard
105+
; CHECK: stp w0, w1, [x2]
106+
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
107+
; CHECK: add w0, [[REG]], w1
108+
; CHECK: ret
109+
define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind {
110+
store i32 %a, i32* %p, align 4
111+
%ld.ptr = getelementptr inbounds i32, i32* %p, i64 2
112+
%tmp = load i32, i32* %ld.ptr, align 4
113+
%tmp2 = add i32 %tmp, %b
114+
%add.ptr = getelementptr inbounds i32, i32* %p, i64 1
115+
store i32 %b, i32* %add.ptr, align 4
116+
ret i32 %tmp2
117+
}
118+
119+
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
120+
; CHECK-LABEL: stp_int_rar_hazard_after
121+
; CHECK: ldr [[REG:w[0-9]+]], [x3, #4]
122+
; CHECK: add w0, [[REG]], w2
123+
; CHECK: stp w1, w2, [x3]
124+
; CHECK: ret
125+
define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind {
126+
store i32 %a, i32* %p, align 4
127+
%ld.ptr = getelementptr inbounds i32, i32* %p, i64 1
128+
%tmp = load i32, i32* %ld.ptr, align 4
129+
%tmp2 = add i32 %tmp, %b
130+
%add.ptr = getelementptr inbounds i32, i32* %p, i64 1
131+
store i32 %b, i32* %add.ptr, align 4
132+
ret i32 %tmp2
133+
}

0 commit comments

Comments
 (0)