Skip to content

Commit 96bbd35

Browse files
committed
[AArch64][SVE] Only fold frame indexes referencing SVE objects into SVE loads/stores
Currently we always fold frame indexes into SVE load/store instructions, however these instructions can only encode VL scaled offests. This means that when we are accessing a fixed length stack object with these instructions, the folded in frame index gets pulled back out during frame lowering. This can cause issues when we have no spare registers and no emergency spill slot. Rather than causing issues like this, don't fold in frame indexes that reference fixed length objects. Fixes: #55041 Differential Revision: https://reviews.llvm.org/D124457
1 parent f496a0e commit 96bbd35

File tree

3 files changed

+81
-4
lines changed

3 files changed

+81
-4
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5092,12 +5092,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
50925092
SDValue &OffImm) {
50935093
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
50945094
const DataLayout &DL = CurDAG->getDataLayout();
5095+
const MachineFrameInfo &MFI = MF->getFrameInfo();
50955096

50965097
if (N.getOpcode() == ISD::FrameIndex) {
50975098
int FI = cast<FrameIndexSDNode>(N)->getIndex();
5098-
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5099-
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5100-
return true;
5099+
// We can only encode VL scaled offsets, so only fold in frame indexes
5100+
// referencing SVE objects.
5101+
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
5102+
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5103+
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5104+
return true;
5105+
}
5106+
5107+
return false;
51015108
}
51025109

51035110
if (MemVT == EVT())
@@ -5124,7 +5131,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
51245131
Base = N.getOperand(0);
51255132
if (Base.getOpcode() == ISD::FrameIndex) {
51265133
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
5127-
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5134+
// We can only encode VL scaled offsets, so only fold in frame indexes
5135+
// referencing SVE objects.
5136+
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
5137+
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
51285138
}
51295139

51305140
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc < %s | FileCheck %s
2+
3+
target triple = "aarch64-unknown-linux-gnu"
4+
5+
; Ensure we don't crash by trying to fold fixed length frame indexes into
6+
; loads/stores that don't support an appropriate addressing mode, hence creating
7+
; too many extra vregs during frame lowering, when we don't have an emergency
8+
; spill slot.
9+
10+
define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
11+
i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16,
12+
i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
13+
i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32,
14+
i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
15+
i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48,
16+
i64 %v49) #0 {
17+
; CHECK-LABEL: func1
18+
tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
19+
i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16,
20+
i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
21+
i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef,
22+
i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
23+
i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48,
24+
i64 undef)
25+
ret void
26+
}
27+
28+
declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
29+
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
30+
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
31+
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
32+
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
33+
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
34+
i64)
35+
36+
attributes #0 = { "target-features"="+sve" vscale_range(2,2) }
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s
3+
4+
; REQUIRES: asserts
5+
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
; Ensure that only no offset frame indexes are folded into SVE load/stores when
9+
; accessing fixed width objects.
10+
define void @foo(<8 x i64>* %a) #0 {
11+
; CHECK-LABEL: foo:
12+
; CHECK: SelectionDAG has 14 nodes:
13+
; CHECK-NEXT: t0: ch = EntryToken
14+
; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
15+
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
16+
; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
17+
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
18+
; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0
19+
; CHECK-NEXT: t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
20+
; CHECK-NEXT: t10: ch = RET_ReallyLR t16
21+
; CHECK-EMPTY:
22+
entry:
23+
%r0 = alloca <8 x i64>
24+
%r1 = alloca <8 x i64>
25+
%r = load volatile <8 x i64>, <8 x i64>* %a
26+
store volatile <8 x i64> %r, <8 x i64>* %r0
27+
store volatile <8 x i64> %r, <8 x i64>* %r1
28+
ret void
29+
}
30+
31+
attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) }

0 commit comments

Comments
 (0)