Skip to content

Commit bdcf3ef

Browse files
[release/9.0-preview7] JIT: Fix placement of GT_START_NOGC for tailcalls in face of bulk copy with write barrier calls (#105572)
* JIT: Fix placement of `GT_START_NOGC` for tailcalls in face of bulk copy with write barrier calls When the JIT generates code for a tailcall it must generate code to write the arguments into the incoming parameter area. Since the GC ness of the arguments of the tailcall may not match the GC ness of the parameters, we have to disable GC before we start writing these. This is done by finding the earliest `GT_PUTARG_STK` node and placing the start of the NOGC region right before it. In addition, there is logic to take care of potential overlap between the arguments and parameters. For example, if the call has an operand that uses one of the parameters, then we must take care that we do not override that parameter with the tailcall argument before the use of it. To do so, we sometimes may need to introduce copies from the parameter locals to locals on the stack frame. This used to work fine, however, with #101761 we started transforming block copies into managed calls in certain scenarios. It was possible for the JIT to decide to introduce a copy to a local and for this transformation to then kick in. This would cause us to end up with the managed helper call after starting the nogc region. In checked builds this would hit an assert during GC scan; in release builds, it would end up with corrupted data. The fix here is to make sure we insert the `GT_START_NOGC` after all the potential temporary copies we may introduce as part of the tailcat stll logic. There was an additional assumption that the first `PUTARG_STK` operand was the earliest one in execution order. That is not guaranteed, so this change stops relying on that as well by introducing a new `LIR::FirstNode` and using that to determine the earliest `PUTARG_STK` node. Fix #102370 Fix #104123 Fix #105441 --------- Co-authored-by: Jakob Botsch Nielsen <[email protected]>
1 parent bb63da9 commit bdcf3ef

File tree

3 files changed

+49
-39
lines changed

3 files changed

+49
-39
lines changed

src/coreclr/jit/lir.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,22 @@ GenTree* LIR::LastNode(GenTree** nodes, size_t numNodes)
18721872
return lastNode;
18731873
}
18741874

1875+
//------------------------------------------------------------------------
1876+
// LIR::FirstNode:
1877+
// Given two nodes in the same block range, find which node appears first.
1878+
//
1879+
// Arguments:
1880+
// node1 - The first node
1881+
// node2 - The second node
1882+
//
1883+
// Returns:
1884+
// Node that appears first.
1885+
//
1886+
GenTree* LIR::FirstNode(GenTree* node1, GenTree* node2)
1887+
{
1888+
return LastNode(node1, node2) == node1 ? node2 : node1;
1889+
}
1890+
18751891
#ifdef DEBUG
18761892
void GenTree::dumpLIRFlags()
18771893
{

src/coreclr/jit/lir.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ class LIR final
317317

318318
static GenTree* LastNode(GenTree* node1, GenTree* node2);
319319
static GenTree* LastNode(GenTree** nodes, size_t numNodes);
320+
static GenTree* FirstNode(GenTree* node1, GenTree* node2);
320321
};
321322

322323
inline void GenTree::SetUnusedValue()

src/coreclr/jit/lower.cpp

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,52 +3057,22 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
30573057
// call could over-write the stack arg that is setup earlier.
30583058
ArrayStack<GenTree*> putargs(comp->getAllocator(CMK_ArrayStack));
30593059

3060-
for (CallArg& arg : call->gtArgs.EarlyArgs())
3061-
{
3062-
if (arg.GetEarlyNode()->OperIs(GT_PUTARG_STK))
3063-
{
3064-
putargs.Push(arg.GetEarlyNode());
3065-
}
3066-
}
3067-
3068-
for (CallArg& arg : call->gtArgs.LateArgs())
3060+
for (CallArg& arg : call->gtArgs.Args())
30693061
{
3070-
if (arg.GetLateNode()->OperIs(GT_PUTARG_STK))
3062+
if (arg.GetNode()->OperIs(GT_PUTARG_STK))
30713063
{
3072-
putargs.Push(arg.GetLateNode());
3064+
putargs.Push(arg.GetNode());
30733065
}
30743066
}
30753067

30763068
GenTree* startNonGCNode = nullptr;
30773069
if (!putargs.Empty())
30783070
{
3079-
// Get the earliest operand of the first PUTARG_STK node. We will make
3080-
// the required copies of args before this node.
3081-
bool unused;
3082-
GenTree* insertionPoint = BlockRange().GetTreeRange(putargs.Bottom(), &unused).FirstNode();
3083-
// Insert GT_START_NONGC node before we evaluate the PUTARG_STK args.
3084-
// Note that if there are no args to be setup on stack, no need to
3085-
// insert GT_START_NONGC node.
3086-
startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
3087-
BlockRange().InsertBefore(insertionPoint, startNonGCNode);
3088-
3089-
// Gc-interruptability in the following case:
3090-
// foo(a, b, c, d, e) { bar(a, b, c, d, e); }
3091-
// bar(a, b, c, d, e) { foo(a, b, d, d, e); }
3092-
//
3093-
// Since the instruction group starting from the instruction that sets up first
3094-
// stack arg to the end of the tail call is marked as non-gc interruptible,
3095-
// this will form a non-interruptible tight loop causing gc-starvation. To fix
3096-
// this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
3097-
// has a single basic block and is not a GC-safe point. The presence of a single
3098-
// nop outside non-gc interruptible region will prevent gc starvation.
3099-
if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT))
3071+
GenTree* firstPutargStk = putargs.Bottom(0);
3072+
for (int i = 1; i < putargs.Height(); i++)
31003073
{
3101-
assert(comp->fgFirstBB == comp->compCurBB);
3102-
GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
3103-
BlockRange().InsertBefore(startNonGCNode, noOp);
3074+
firstPutargStk = LIR::FirstNode(firstPutargStk, putargs.Bottom(i));
31043075
}
3105-
31063076
// Since this is a fast tailcall each PUTARG_STK will place the argument in the
31073077
// _incoming_ arg space area. This will effectively overwrite our already existing
31083078
// incoming args that live in that area. If we have later uses of those args, this
@@ -3172,10 +3142,10 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
31723142
GenTree* lookForUsesFrom = put->gtNext;
31733143
if (overwrittenStart != argStart)
31743144
{
3175-
lookForUsesFrom = insertionPoint;
3145+
lookForUsesFrom = firstPutargStk;
31763146
}
31773147

3178-
RehomeArgForFastTailCall(callerArgLclNum, insertionPoint, lookForUsesFrom, call);
3148+
RehomeArgForFastTailCall(callerArgLclNum, firstPutargStk, lookForUsesFrom, call);
31793149
// The above call can introduce temps and invalidate the pointer.
31803150
callerArgDsc = comp->lvaGetDesc(callerArgLclNum);
31813151

@@ -3189,10 +3159,33 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
31893159
unsigned int fieldsEnd = fieldsFirst + callerArgDsc->lvFieldCnt;
31903160
for (unsigned int j = fieldsFirst; j < fieldsEnd; j++)
31913161
{
3192-
RehomeArgForFastTailCall(j, insertionPoint, lookForUsesFrom, call);
3162+
RehomeArgForFastTailCall(j, firstPutargStk, lookForUsesFrom, call);
31933163
}
31943164
}
31953165
}
3166+
3167+
// Now insert GT_START_NONGC node before we evaluate the first PUTARG_STK.
3168+
// Note that if there are no args to be setup on stack, no need to
3169+
// insert GT_START_NONGC node.
3170+
startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
3171+
BlockRange().InsertBefore(firstPutargStk, startNonGCNode);
3172+
3173+
// Gc-interruptability in the following case:
3174+
// foo(a, b, c, d, e) { bar(a, b, c, d, e); }
3175+
// bar(a, b, c, d, e) { foo(a, b, d, d, e); }
3176+
//
3177+
// Since the instruction group starting from the instruction that sets up first
3178+
// stack arg to the end of the tail call is marked as non-gc interruptible,
3179+
// this will form a non-interruptible tight loop causing gc-starvation. To fix
3180+
// this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
3181+
// has a single basic block and is not a GC-safe point. The presence of a single
3182+
// nop outside non-gc interruptible region will prevent gc starvation.
3183+
if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT))
3184+
{
3185+
assert(comp->fgFirstBB == comp->compCurBB);
3186+
GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
3187+
BlockRange().InsertBefore(startNonGCNode, noOp);
3188+
}
31963189
}
31973190

31983191
// Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be

0 commit comments

Comments
 (0)