Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 21c68fa

Browse files
author
Hal Finkel
committed
[PowerPC] Fix address-offset folding for plain addi
When folding an addi into a memory access that can take an immediate offset, we were implicitly assuming that the existing offset was zero. This was incorrect. If we're dealing with an addi with a plain constant, we can add it to the existing offset (assuming that doesn't overflow the immediate, etc.), but if we have anything else (i.e. something that will become a relocation expression), we'll go back to requiring the existing immediate offset to be zero (because we don't know what the requirements on that relocation expression might be - e.g. maybe it is paired with some addis in some relevant way). On the other hand, when dealing with a plain addi with a regular constant immediate, the alignment restrictions (from the TOC base pointer, etc.) are irrelevant. I've added the test case from PR30280, which demonstrated the bug, but also demonstrates a missed optimization opportunity (i.e. we don't need the memory accesses at all). Fixes PR30280. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280789 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 7840500 commit 21c68fa

File tree

2 files changed

+78
-15
lines changed

2 files changed

+78
-15
lines changed

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

+38-15
Original file line numberDiff line numberDiff line change
@@ -4385,25 +4385,48 @@ void PPCDAGToDAGISel::PeepholePPC64() {
43854385
SDValue HBase = Base.getOperand(0);
43864386

43874387
int Offset = N->getConstantOperandVal(FirstOp);
4388-
if (Offset < 0 || Offset > MaxDisplacement) {
4389-
// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
4390-
// one use, then we can do this for any offset, we just need to also
4391-
// update the offset (i.e. the symbol addend) on the addis also.
4392-
if (Base.getMachineOpcode() != PPC::ADDItocL)
4393-
continue;
4388+
if (ReplaceFlags) {
4389+
if (Offset < 0 || Offset > MaxDisplacement) {
4390+
// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
4391+
// one use, then we can do this for any offset, we just need to also
4392+
// update the offset (i.e. the symbol addend) on the addis also.
4393+
if (Base.getMachineOpcode() != PPC::ADDItocL)
4394+
continue;
43944395

4395-
if (!HBase.isMachineOpcode() ||
4396-
HBase.getMachineOpcode() != PPC::ADDIStocHA)
4397-
continue;
4396+
if (!HBase.isMachineOpcode() ||
4397+
HBase.getMachineOpcode() != PPC::ADDIStocHA)
4398+
continue;
43984399

4399-
if (!Base.hasOneUse() || !HBase.hasOneUse())
4400-
continue;
4400+
if (!Base.hasOneUse() || !HBase.hasOneUse())
4401+
continue;
44014402

4402-
SDValue HImmOpnd = HBase.getOperand(1);
4403-
if (HImmOpnd != ImmOpnd)
4404-
continue;
4403+
SDValue HImmOpnd = HBase.getOperand(1);
4404+
if (HImmOpnd != ImmOpnd)
4405+
continue;
44054406

4406-
UpdateHBase = true;
4407+
UpdateHBase = true;
4408+
}
4409+
} else {
4410+
// If we're directly folding the addend from an addi instruction, then:
4411+
// 1. In general, the offset on the memory access must be zero.
4412+
// 2. If the addend is a constant, then it can be combined with a
4413+
// non-zero offset, but only if the result meets the encoding
4414+
// requirements.
4415+
if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
4416+
Offset += C->getSExtValue();
4417+
4418+
if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
4419+
StorageOpcode == PPC::STD) && (Offset % 4) != 0)
4420+
continue;
4421+
4422+
if (!isInt<16>(Offset))
4423+
continue;
4424+
4425+
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
4426+
ImmOpnd.getValueType());
4427+
} else if (Offset != 0) {
4428+
continue;
4429+
}
44074430
}
44084431

44094432
// We found an opportunity. Reverse the operands from the add
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; RUN: llc < %s | FileCheck %s
2+
target datalayout = "e-m:e-i64:64-n32:64"
3+
target triple = "powerpc64le-unknown-linux-gnu"
4+
5+
%struct.S0 = type <{ i32, [5 x i8] }>
6+
7+
; Function Attrs: norecurse nounwind readnone
8+
define signext i32 @foo([2 x i64] %a.coerce) local_unnamed_addr #0 {
9+
entry:
10+
%a = alloca %struct.S0, align 8
11+
%a.coerce.fca.0.extract = extractvalue [2 x i64] %a.coerce, 0
12+
%a.coerce.fca.1.extract = extractvalue [2 x i64] %a.coerce, 1
13+
%a.0.a.0..sroa_cast = bitcast %struct.S0* %a to i64*
14+
store i64 %a.coerce.fca.0.extract, i64* %a.0.a.0..sroa_cast, align 8
15+
%tmp.sroa.2.0.extract.trunc = trunc i64 %a.coerce.fca.1.extract to i8
16+
%a.8.a.8..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1, i64 4
17+
store i8 %tmp.sroa.2.0.extract.trunc, i8* %a.8.a.8..sroa_idx, align 8
18+
%a.4.a.4..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1
19+
%a.4.a.4..sroa_cast = bitcast [5 x i8]* %a.4.a.4..sroa_idx to i40*
20+
%a.4.a.4.bf.load = load i40, i40* %a.4.a.4..sroa_cast, align 4
21+
%bf.lshr = lshr i40 %a.4.a.4.bf.load, 31
22+
%bf.lshr.tr = trunc i40 %bf.lshr to i32
23+
%bf.cast = and i32 %bf.lshr.tr, 127
24+
ret i32 %bf.cast
25+
26+
; CHECK-LABEL: @foo
27+
; FIXME: We don't need to do these stores/loads at all.
28+
; CHECK-DAG: std 3, -24(1)
29+
; CHECK-DAG: stb 4, -16(1)
30+
; CHECK: ori 2, 2, 0
31+
; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1)
32+
; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1)
33+
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32
34+
; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]]
35+
; CHECK: rldicl 3, [[REG4]], 33, 57
36+
; CHECK: blr
37+
}
38+
39+
attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="ppc64le" }
40+

0 commit comments

Comments
 (0)