Skip to content

Commit d6d9aca

Browse files
bors[bot]ptersilie
andauthored
61: Fix changes to stackmaps made by the register allocator. r=ltratt a=ptersilie Co-authored-by: Lukas Diekmann <[email protected]>
2 parents 6e9fe3e + f4c038b commit d6d9aca

File tree

8 files changed

+326
-1
lines changed

8 files changed

+326
-1
lines changed

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,11 @@ namespace llvm {
571571
/// caller saved registers with stack slots.
572572
extern char &FixupStatepointCallerSavedID;
573573

574+
/// This pass fixes stackmaps by moving the STACKMAP instruction back to its
575+
/// pre-regalloc location, and reverting its operands back to the original
576+
/// values (before spill reloads).
577+
extern char &FixStackmapsSpillReloadsID;
578+
574579
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
575580
/// or split the data to two <128 x i32>.
576581
FunctionPass *createX86LowerAMXTypePass();

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ void initializeFinalizeISelPass(PassRegistry&);
139139
void initializeFinalizeMachineBundlesPass(PassRegistry&);
140140
void initializeFixIrreduciblePass(PassRegistry &);
141141
void initializeFixupStatepointCallerSavedPass(PassRegistry&);
142+
void initializeFixStackmapsSpillReloadsPass(PassRegistry&);
142143
void initializeFlattenCFGLegacyPassPass(PassRegistry &);
143144
void initializeFloat2IntLegacyPassPass(PassRegistry&);
144145
void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ add_llvm_component_library(LLVMCodeGen
216216
StackFrameLayoutAnalysisPass.cpp
217217
StackMapLivenessAnalysis.cpp
218218
StackMaps.cpp
219+
Yk/FixStackmapsSpillReloads.cpp
219220
StackProtector.cpp
220221
StackSlotColoring.cpp
221222
SwiftErrorValueTracking.cpp

llvm/lib/CodeGen/CodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
5454
initializeFinalizeISelPass(Registry);
5555
initializeFinalizeMachineBundlesPass(Registry);
5656
initializeFixupStatepointCallerSavedPass(Registry);
57+
initializeFixStackmapsSpillReloadsPass(Registry);
5758
initializeFuncletLayoutPass(Registry);
5859
initializeGCMachineCodeAnalysisPass(Registry);
5960
initializeGCModuleInfoPass(Registry);

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656

5757
using namespace llvm;
5858

59+
extern bool YkStackmapsSpillReloadsFix;
60+
5961
static cl::opt<bool>
6062
EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
6163
cl::desc("Enable interprocedural register allocation "
@@ -1230,6 +1232,13 @@ void TargetPassConfig::addMachinePasses() {
12301232
// Expand pseudo instructions before second scheduling pass.
12311233
addPass(&ExpandPostRAPseudosID);
12321234

1235+
// Add pass to revert stackmap instructions altered by register allocation.
1236+
// We need to insert this pass late so that spill offsets will have been
1237+
// calculated.
1238+
if (YkStackmapsSpillReloadsFix) {
1239+
addPass(&FixStackmapsSpillReloadsID);
1240+
}
1241+
12331242
// Run pre-sched2 passes.
12341243
addPreSched2();
12351244

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
//===-- FixStackmapsSpillReloads.cpp - Fix spills before stackmaps --------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass fixes stackmaps in regards to spill reloads inserted by the
10+
// register allocator. For example, if we have the LLVM IR
11+
//
12+
// call foo($10, $11)
13+
// call llvm.experimental.stackmaps(1, 0, $8, $9)
14+
//
15+
// After register allocation we might get something like
16+
//
17+
// movrr $rbx, $rsi
18+
// movmr $rbp, -8, $rdi
19+
// ...
20+
// call foo($rsi, $rdi)
21+
// movrr $rsi, $rbx
22+
// movrm $rdi, $rbp, -8
23+
// STACKMAP $rsi, $rdi
24+
//
25+
// In order to pass arguments to foo, the register allocator had to spill the
26+
// values in $rdi and $rsi into another register or onto the stack before the
27+
// call. Then immediately after the call it inserted instructions to reload
28+
// the spilled values back into the original registers. Since during
29+
// deoptimisation we return to immediately after the call, the stackmap is now
30+
// tracking the wrong values, e.g. in this case $rdi and $rsi instead of the
31+
// spill locations.
32+
//
33+
// This pass interates over all basic blocks, finds spill reloads inserted
34+
// inbetween a call and stackmap, replaces the stackmap operands with the
35+
// spill reloads, and then moves the stackmap instruction up just below the
36+
// call.
37+
//===----------------------------------------------------------------------===//
38+
39+
#include "llvm/CodeGen/MachineBasicBlock.h"
40+
#include "llvm/CodeGen/MachineFrameInfo.h"
41+
#include "llvm/CodeGen/MachineFunction.h"
42+
#include "llvm/CodeGen/MachineFunctionPass.h"
43+
#include "llvm/CodeGen/MachineInstr.h"
44+
#include "llvm/CodeGen/MachineInstrBuilder.h"
45+
#include "llvm/CodeGen/MachineOperand.h"
46+
#include "llvm/CodeGen/Passes.h"
47+
#include "llvm/CodeGen/StackMaps.h"
48+
#include "llvm/CodeGen/TargetInstrInfo.h"
49+
#include "llvm/IR/DebugLoc.h"
50+
#include "llvm/InitializePasses.h"
51+
#include "llvm/Support/Debug.h"
52+
53+
using namespace llvm;
54+
55+
#define DEBUG_TYPE "fix-stackmaps-spill-reloads"
56+
57+
namespace {
58+
59+
class FixStackmapsSpillReloads : public MachineFunctionPass {
60+
public:
61+
static char ID;
62+
63+
FixStackmapsSpillReloads() : MachineFunctionPass(ID) {
64+
initializeFixStackmapsSpillReloadsPass(*PassRegistry::getPassRegistry());
65+
}
66+
67+
void getAnalysisUsage(AnalysisUsage &AU) const override {
68+
AU.setPreservesCFG();
69+
MachineFunctionPass::getAnalysisUsage(AU);
70+
}
71+
72+
StringRef getPassName() const override {
73+
return "Stackmaps Fix Post RegAlloc Pass";
74+
}
75+
76+
bool runOnMachineFunction(MachineFunction &MF) override;
77+
};
78+
79+
} // namespace
80+
81+
char FixStackmapsSpillReloads::ID = 0;
82+
char &llvm::FixStackmapsSpillReloadsID = FixStackmapsSpillReloads::ID;
83+
84+
INITIALIZE_PASS_BEGIN(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spills",
85+
false, false)
86+
INITIALIZE_PASS_END(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spills",
87+
false, false)
88+
89+
90+
bool FixStackmapsSpillReloads::runOnMachineFunction(MachineFunction &MF) {
91+
bool Changed = false;
92+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
93+
for (MachineBasicBlock &MBB : MF) {
94+
bool Collect = false;
95+
std::set<MachineInstr *> Erased;
96+
MachineInstr *LastCall = nullptr;
97+
std::map<Register, MachineInstr *> Spills;
98+
for (MachineInstr &MI : MBB) {
99+
if (MI.isCall() && !MI.isInlineAsm()) {
100+
// YKFIXME: Do we need to check for intrinsics here or have they been
101+
// removed during lowering?
102+
if (MI.getOpcode() != TargetOpcode::STACKMAP &&
103+
MI.getOpcode() != TargetOpcode::PATCHPOINT) {
104+
// If we see a normal function call we know it will be followed by a
105+
// STACKMAP instruction. Set `Collect` to `true` to collect all spill
106+
// reload instructions between this call and the STACKMAP instruction.
107+
// Also remember this call, so we can insert the new STACKMAP
108+
// instruction right below it.
109+
Collect = true;
110+
LastCall = &MI;
111+
Spills.clear();
112+
continue;
113+
}
114+
}
115+
116+
if (MI.getOpcode() == TargetOpcode::STACKMAP) {
117+
if (LastCall == nullptr) {
118+
// There wasn't a call preceeding this stackmap, so this must be
119+
// attached to a branch instruction.
120+
continue;
121+
}
122+
Collect = false;
123+
// Assemble a new stackmap instruction by copying over the operands of
124+
// the old instruction to the new one, while replacing spilled operands
125+
// as we go.
126+
MachineInstr *NewMI =
127+
MF.CreateMachineInstr(TII->get(TargetOpcode::STACKMAP), MI.getDebugLoc(), true);
128+
MachineInstrBuilder MIB(MF, NewMI);
129+
// Copy ID and shadow
130+
auto *MOI = MI.operands_begin();
131+
MIB.add(*MOI); // ID
132+
MOI++;
133+
MIB.add(*MOI); // Shadow
134+
MOI++;
135+
while (MOI != MI.operands_end()) {
136+
if (MOI->isReg()) {
137+
Register Reg = MOI->getReg();
138+
// Check if the register operand in the stackmap is a restored
139+
// spill.
140+
if (Spills.count(Reg) > 0) {
141+
// Get spill reload instruction
142+
MachineInstr *SMI = Spills[Reg];
143+
int FI;
144+
if (TII->isCopyInstr(*SMI)) {
145+
// If the reload is a simple copy, e.g. $rax = $rbx,
146+
// just replace the stackmap operand with the source of the
147+
// copy instruction.
148+
MIB.add(SMI->getOperand(1));
149+
} else if (TII->isLoadFromStackSlotPostFE(*SMI, FI)) {
150+
// If the reload is a load from the stack, replace the operand
151+
// with multiple operands describing a stack location.
152+
MIB.addImm(StackMaps::IndirectMemRefOp);
153+
std::optional<unsigned> Size = SMI->getRestoreSize(TII);
154+
assert(Size.has_value() && "RestoreSize has no value.");
155+
MIB.addImm(Size.value()); // Size
156+
MIB.add(SMI->getOperand(1)); // Register
157+
MIB.add(SMI->getOperand(4)); // Offset
158+
} else {
159+
assert(false && "Unknown instruction found");
160+
}
161+
} else {
162+
MIB.add(*MOI);
163+
}
164+
MOI++;
165+
continue;
166+
}
167+
// Copy all other operands over as is.
168+
MIB.add(*MOI);
169+
switch (MOI->getImm()) {
170+
default:
171+
llvm_unreachable("Unrecognized operand type.");
172+
case StackMaps::DirectMemRefOp: {
173+
MOI++;
174+
MIB.add(*MOI); // Register
175+
MOI++;
176+
MIB.add(*MOI); // Offset
177+
break;
178+
}
179+
case StackMaps::IndirectMemRefOp: {
180+
MOI++;
181+
MIB.add(*MOI); // Size
182+
MOI++;
183+
MIB.add(*MOI); // Register
184+
MOI++;
185+
MIB.add(*MOI); // Offset
186+
break;
187+
}
188+
case StackMaps::ConstantOp: {break;}
189+
case StackMaps::NextLive: {break;}
190+
}
191+
MOI++;
192+
}
193+
// Insert the new stackmap instruction just after the last call.
194+
MI.getParent()->insertAfter(LastCall, NewMI);
195+
// Remember the old stackmap instruction for deletion later.
196+
Erased.insert(&MI);
197+
LastCall = nullptr;
198+
Changed = true;
199+
}
200+
201+
// Collect spill reloads that appear between a call and its corresponding
202+
// STACKMAP instruction.
203+
if (Collect) {
204+
int FI;
205+
if (TII->isCopyInstr(MI) || TII->isLoadFromStackSlotPostFE(MI, FI)) {
206+
Spills[MI.getOperand(0).getReg()] = &MI;
207+
}
208+
}
209+
}
210+
// Remove old stackmap instructions.
211+
for (MachineInstr *E : Erased) {
212+
E->eraseFromParent();
213+
}
214+
}
215+
216+
return Changed;
217+
}

llvm/lib/Support/Yk.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ bool YkStackMapOffsetFix;
2323
static cl::opt<bool, true> YkStackMapOffsetFixParser(
2424
"yk-stackmap-offset-fix",
2525
cl::desc("Apply a fix to stackmaps that corrects the reported instruction "
26-
"offset in the presence of calls."),
26+
"offset in the presence of calls. (deprecated by "
27+
"yk-stackmap-spillreloads-fix)"),
2728
cl::NotHidden, cl::location(YkStackMapOffsetFix));
2829

2930
bool YkStackMapAdditionalLocs;
3031
static cl::opt<bool, true> YkStackMapAdditionalLocsParser(
3132
"yk-stackmap-add-locs",
3233
cl::desc("Encode additional locations for registers into stackmaps."),
3334
cl::NotHidden, cl::location(YkStackMapAdditionalLocs));
35+
36+
bool YkStackmapsSpillReloadsFix;
37+
static cl::opt<bool, true> YkStackMapSpillFixParser(
38+
"yk-stackmap-spillreloads-fix",
39+
cl::desc("Revert stackmaps and its operands after the register allocator "
40+
"has emitted spill reloads."),
41+
cl::NotHidden, cl::location(YkStackmapsSpillReloadsFix));
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; RUN: llc -stop-after fix-stackmaps-spill-reloads --yk-stackmap-spillreloads-fix < %s | FileCheck %s
2+
3+
; CHECK-LABEL: name: main
4+
; CHECK-LABEL: bb.0 (%ir-block.1):
5+
; CHECK-LABEL: CALL64pcrel32 target-flags(x86-plt) @foo2,
6+
; CHECK-NEXT: STACKMAP 1, 0, renamable $ebx, 3, renamable $r14d, 3, 1, 4, $rbp, -48, 3, renamable $r12d, 3, 1, 4, $rbp, -52, 3, renamable $r15d, 3, renamable $r13d, 3, implicit-def dead early-clobber $r11
7+
8+
@.str = private unnamed_addr constant [13 x i8] c"%d %d %d %d\0A\00", align 1
9+
10+
define dso_local i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6) #0 {
11+
%8 = alloca i32, align 4
12+
%9 = alloca i32, align 4
13+
%10 = alloca i32, align 4
14+
%11 = alloca i32, align 4
15+
%12 = alloca i32, align 4
16+
%13 = alloca i32, align 4
17+
%14 = alloca i32, align 4
18+
%15 = alloca i32, align 4
19+
store i32 %0, ptr %9, align 4
20+
store i32 %1, ptr %10, align 4
21+
store i32 %2, ptr %11, align 4
22+
store i32 %3, ptr %12, align 4
23+
store i32 %4, ptr %13, align 4
24+
store i32 %5, ptr %14, align 4
25+
store i32 %6, ptr %15, align 4
26+
%16 = load i32, ptr %9, align 4
27+
%17 = load i32, ptr %10, align 4
28+
%18 = load i32, ptr %11, align 4
29+
%19 = load i32, ptr %12, align 4
30+
%20 = load i32, ptr %13, align 4
31+
%21 = load i32, ptr %14, align 4
32+
%22 = load i32, ptr %15, align 4
33+
%23 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %16, i32 noundef %17, i32 noundef %18, i32 noundef %19, i32 noundef %20, i32 noundef %21, i32 noundef %22)
34+
%24 = load i32, ptr %8, align 4
35+
ret i32 %24
36+
}
37+
38+
declare i32 @printf(ptr noundef, ...) #2
39+
40+
define dso_local i32 @main(i32 noundef %0) #0 {
41+
%2 = alloca i32, align 4
42+
%3 = alloca i32, align 4
43+
%4 = alloca i32, align 4
44+
%5 = alloca i32, align 4
45+
%6 = alloca i32, align 4
46+
%7 = alloca i32, align 4
47+
%8 = alloca i32, align 4
48+
%9 = alloca i32, align 4
49+
store i32 %0, ptr %2, align 4
50+
%10 = load i32, ptr %2, align 4
51+
%11 = mul nsw i32 %10, 1
52+
store i32 %11, ptr %3, align 4
53+
%12 = load i32, ptr %2, align 4
54+
%13 = mul nsw i32 %12, 2
55+
store i32 %13, ptr %4, align 4
56+
%14 = load i32, ptr %2, align 4
57+
%15 = mul nsw i32 %14, 3
58+
store i32 %15, ptr %5, align 4
59+
%16 = load i32, ptr %2, align 4
60+
%17 = mul nsw i32 %16, 4
61+
store i32 %17, ptr %6, align 4
62+
%18 = load i32, ptr %2, align 4
63+
%19 = mul nsw i32 %18, 5
64+
store i32 %19, ptr %7, align 4
65+
%20 = load i32, ptr %2, align 4
66+
%21 = mul nsw i32 %20, 6
67+
store i32 %21, ptr %8, align 4
68+
%22 = load i32, ptr %2, align 4
69+
%23 = mul nsw i32 %22, 7
70+
store i32 %23, ptr %9, align 4
71+
%24 = call i32 @foo2(i32 noundef %23, i32 noundef %21, i32 noundef %19, i32 noundef %17, i32 noundef %15, i32 noundef %13, i32 noundef %11)
72+
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 0, i32 %11, i32 %13, i32 %15, i32 %17, i32 %19, i32 %21, i32 %23)
73+
%25 = mul nsw i32 %23, 5
74+
%26 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %11, i32 noundef %13, i32 noundef %15, i32 noundef %17, i32 noundef %19, i32 noundef %21, i32 noundef %25)
75+
ret i32 0
76+
}
77+
78+
declare void @foo2(...)
79+
declare void @llvm.experimental.stackmap(i64, i32, ...)
80+
81+
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
82+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
83+
attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

0 commit comments

Comments
 (0)