Skip to content

Commit ce65262

Browse files
aemersoncuviper
authored andcommitted
[AArch64][GlobalISel] Fix incorrect handling of fp truncating stores.
When the tablegen patterns fail to select a truncating scalar FPR store, our manual selection code also failed to handle it silently, trying to generate an invalid copy. Fix this by adding support in the manual code to generate a proper subreg copy before selecting a non-truncating store. (cherry-picked from 04fb9b7)
1 parent 2abffbf commit ce65262

File tree

3 files changed

+151
-15
lines changed

3 files changed

+151
-15
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

+31-11
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "MCTargetDesc/AArch64AddressingModes.h"
2323
#include "MCTargetDesc/AArch64MCTargetDesc.h"
2424
#include "llvm/ADT/Optional.h"
25+
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2526
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
2627
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
2728
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -2701,29 +2702,29 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27012702
case TargetOpcode::G_ZEXTLOAD:
27022703
case TargetOpcode::G_LOAD:
27032704
case TargetOpcode::G_STORE: {
2705+
GLoadStore &LdSt = cast<GLoadStore>(I);
27042706
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2705-
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2707+
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
27062708

27072709
if (PtrTy != LLT::pointer(0, 64)) {
27082710
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
27092711
<< ", expected: " << LLT::pointer(0, 64) << '\n');
27102712
return false;
27112713
}
27122714

2713-
auto &MemOp = **I.memoperands_begin();
2714-
uint64_t MemSizeInBytes = MemOp.getSize();
2715-
unsigned MemSizeInBits = MemSizeInBytes * 8;
2716-
AtomicOrdering Order = MemOp.getSuccessOrdering();
2715+
uint64_t MemSizeInBytes = LdSt.getMemSize();
2716+
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2717+
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
27172718

27182719
// Need special instructions for atomics that affect ordering.
27192720
if (Order != AtomicOrdering::NotAtomic &&
27202721
Order != AtomicOrdering::Unordered &&
27212722
Order != AtomicOrdering::Monotonic) {
2722-
assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
2723+
assert(!isa<GZExtLoad>(LdSt));
27232724
if (MemSizeInBytes > 64)
27242725
return false;
27252726

2726-
if (I.getOpcode() == TargetOpcode::G_LOAD) {
2727+
if (isa<GLoad>(LdSt)) {
27272728
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
27282729
AArch64::LDARW, AArch64::LDARX};
27292730
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
@@ -2737,7 +2738,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27372738
}
27382739

27392740
#ifndef NDEBUG
2740-
const Register PtrReg = I.getOperand(1).getReg();
2741+
const Register PtrReg = LdSt.getPointerReg();
27412742
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
27422743
// Sanity-check the pointer register.
27432744
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
@@ -2746,13 +2747,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27462747
"Load/Store pointer operand isn't a pointer");
27472748
#endif
27482749

2749-
const Register ValReg = I.getOperand(0).getReg();
2750+
const Register ValReg = LdSt.getReg(0);
2751+
const LLT ValTy = MRI.getType(ValReg);
27502752
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
27512753

2754+
// The code below doesn't support truncating stores, so we need to split it
2755+
// again.
2756+
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2757+
unsigned SubReg;
2758+
LLT MemTy = LdSt.getMMO().getMemoryType();
2759+
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2760+
if (!getSubRegForClass(RC, TRI, SubReg))
2761+
return false;
2762+
2763+
// Generate a subreg copy.
2764+
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2765+
.addReg(ValReg, 0, SubReg)
2766+
.getReg(0);
2767+
RBI.constrainGenericRegister(Copy, *RC, MRI);
2768+
LdSt.getOperand(0).setReg(Copy);
2769+
}
2770+
27522771
// Helper lambda for partially selecting I. Either returns the original
27532772
// instruction with an updated opcode, or a new instruction.
27542773
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2755-
bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2774+
bool IsStore = isa<GStore>(I);
27562775
const unsigned NewOpc =
27572776
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
27582777
if (NewOpc == I.getOpcode())
@@ -2769,7 +2788,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27692788

27702789
// Folded something. Create a new instruction and return it.
27712790
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2772-
IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2791+
Register CurValReg = I.getOperand(0).getReg();
2792+
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
27732793
NewInst.cloneMemRefs(I);
27742794
for (auto &Fn : *AddrModeFns)
27752795
Fn(NewInst);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
--- |
4+
define void @truncating_f32(double %x) {
5+
%alloca = alloca i32, align 4
6+
%bitcast = bitcast double %x to i64
7+
%trunc = trunc i64 %bitcast to i32
8+
store i32 %trunc, i32* %alloca, align 4
9+
ret void
10+
}
11+
12+
define void @truncating_f16(double %x) {
13+
%alloca = alloca i16, align 2
14+
%bitcast = bitcast double %x to i64
15+
%trunc = trunc i64 %bitcast to i16
16+
store i16 %trunc, i16* %alloca, align 2
17+
ret void
18+
}
19+
20+
define void @truncating_f8(double %x) {
21+
%alloca = alloca i8, align 1
22+
%bitcast = bitcast double %x to i64
23+
%trunc = trunc i64 %bitcast to i8
24+
store i8 %trunc, i8* %alloca, align 1
25+
ret void
26+
}
27+
28+
...
29+
---
30+
name: truncating_f32
31+
alignment: 4
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
liveins:
36+
- { reg: '$d0' }
37+
frameInfo:
38+
maxAlignment: 4
39+
stack:
40+
- { id: 0, name: alloca, size: 4, alignment: 4 }
41+
machineFunctionInfo: {}
42+
body: |
43+
bb.1 (%ir-block.0):
44+
liveins: $d0
45+
46+
; CHECK-LABEL: name: truncating_f32
47+
; CHECK: liveins: $d0
48+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
49+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
50+
; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca)
51+
; CHECK: RET_ReallyLR
52+
%0:fpr(s64) = COPY $d0
53+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
54+
G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca)
55+
RET_ReallyLR
56+
57+
...
58+
---
59+
name: truncating_f16
60+
alignment: 4
61+
legalized: true
62+
regBankSelected: true
63+
tracksRegLiveness: true
64+
liveins:
65+
- { reg: '$d0' }
66+
frameInfo:
67+
maxAlignment: 2
68+
stack:
69+
- { id: 0, name: alloca, size: 2, alignment: 2 }
70+
machineFunctionInfo: {}
71+
body: |
72+
bb.1 (%ir-block.0):
73+
liveins: $d0
74+
75+
; CHECK-LABEL: name: truncating_f16
76+
; CHECK: liveins: $d0
77+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
78+
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
79+
; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca)
80+
; CHECK: RET_ReallyLR
81+
%0:fpr(s64) = COPY $d0
82+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
83+
G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca)
84+
RET_ReallyLR
85+
86+
...
87+
---
88+
name: truncating_f8
89+
alignment: 4
90+
legalized: true
91+
regBankSelected: true
92+
tracksRegLiveness: true
93+
liveins:
94+
- { reg: '$d0' }
95+
frameInfo:
96+
maxAlignment: 1
97+
stack:
98+
- { id: 0, name: alloca, size: 1, alignment: 1 }
99+
machineFunctionInfo: {}
100+
body: |
101+
bb.1 (%ir-block.0):
102+
liveins: $d0
103+
104+
; CHECK-LABEL: name: truncating_f8
105+
; CHECK: liveins: $d0
106+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
107+
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
108+
; CHECK: [[COPY2:%[0-9]+]]:fpr8 = COPY [[COPY1]]
109+
; CHECK: STRBui [[COPY2]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca)
110+
; CHECK: RET_ReallyLR
111+
%0:fpr(s64) = COPY $d0
112+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
113+
G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca)
114+
RET_ReallyLR
115+
116+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir

+4-4
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,13 @@ body: |
278278
; CHECK-LABEL: name: test_rule96_id2146_at_idx8070
279279
; CHECK: liveins: $x0
280280
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
281-
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1))
281+
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
282282
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
283-
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
283+
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7
284284
; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
285285
%2:gpr(p0) = COPY $x0
286-
%0:fpr(s1) = G_LOAD %2(p0) :: (load (s1))
287-
%1:gpr(s32) = G_ZEXT %0(s1)
286+
%0:fpr(s8) = G_LOAD %2(p0) :: (load (s8))
287+
%1:gpr(s32) = G_ZEXT %0(s8)
288288
$noreg = PATCHABLE_RET %1(s32)
289289
290290
...

0 commit comments

Comments
 (0)