Skip to content

Commit 9283681

Browse files
committed
[CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered
X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699
1 parent 5d21f75 commit 9283681

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
261261
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
262262
MachineOperand &MO = MI.getOperand(i);
263263

264-
if (MO.isRegMask())
265-
for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
266-
if (MO.clobbersPhysReg(i)) {
264+
if (MO.isRegMask()) {
265+
auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
266+
for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
267+
if (!MO.clobbersPhysReg(*SRI))
268+
return false;
269+
270+
return true;
271+
};
272+
273+
for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
274+
if (ClobbersPhysRegAndSubRegs(i)) {
267275
DefIndices[i] = Count;
268276
KillIndices[i] = ~0u;
269277
KeepRegs.reset(i);
270278
Classes[i] = nullptr;
271279
RegRefs.erase(i);
272280
}
281+
}
282+
}
273283

274284
if (!MO.isReg()) continue;
275285
Register Reg = MO.getReg();

llvm/test/CodeGen/X86/pr44140.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ define win64cc void @opaque() {
1010

1111
; We need xmm6 to be live from the loop header across all iterations of the loop.
1212
; We shouldn't clobber ymm6 inside the loop.
13-
; FIXME: We currently clobber ymm6
1413
define i32 @main() {
1514
; CHECK-LABEL: main:
1615
; CHECK: # %bb.0: # %start
@@ -23,18 +22,18 @@ define i32 @main() {
2322
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2423
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
2524
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
26-
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm6
25+
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7
2726
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2
2827
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3
2928
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
3029
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
3130
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
3231
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
3332
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
34-
; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
33+
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
3534
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
3635
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
37-
; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
36+
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
3837
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
3938
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
4039
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5

0 commit comments

Comments
 (0)