Skip to content

Commit 25e088f

Browse files
authored
[RISC-V] Improve clamped subtract & increment (#118530)
Improve codegens with hardcoded subtract clamped to 0 (in localloc) and increment clamped to maximum (in inc_saturate). Part of #84834, cc @dotnet/samsung
1 parent 6549d43 commit 25e088f

File tree

2 files changed

+27
-33
lines changed

2 files changed

+27
-33
lines changed

src/coreclr/jit/codegenriscv64.cpp

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,12 +1006,11 @@ void CodeGen::genCodeForIncSaturate(GenTree* tree)
10061006
// The src must be a register.
10071007
regNumber operandReg = genConsumeReg(operand);
10081008
emitAttr attr = emitActualTypeSize(tree);
1009+
assert(EA_SIZE(attr) == EA_PTRSIZE);
1010+
noway_assert(targetReg != operandReg); // lifetime of the operand register should have been extended
10091011

1010-
BasicBlock* skip = genCreateTempLabel();
1011-
GetEmitter()->emitIns_R_R_I(INS_addi, attr, targetReg, operandReg, 1);
1012-
GetEmitter()->emitIns_J_cond_la(INS_bne, skip, targetReg, REG_R0);
1013-
GetEmitter()->emitIns_R_R(INS_not, attr, targetReg, targetReg);
1014-
genDefineTempLabel(skip);
1012+
GetEmitter()->emitIns_R_R_I(INS_sltiu, attr, targetReg, operandReg, SIZE_T_MAX); // temp = (operand < max) ? 1 : 0;
1013+
GetEmitter()->emitIns_R_R_R(INS_add, attr, targetReg, operandReg, targetReg); // target = operand + temp;
10151014

10161015
genProduceReg(tree);
10171016
}
@@ -1366,6 +1365,7 @@ void CodeGen::genLclHeap(GenTree* tree)
13661365
regNumber targetReg = tree->GetRegNum();
13671366
regNumber regCnt = REG_NA;
13681367
regNumber tempReg = REG_NA;
1368+
regNumber spSourceReg = REG_SPBASE;
13691369
var_types type = genActualType(size->gtType);
13701370
emitAttr easz = emitTypeSize(type);
13711371
BasicBlock* endLabel = nullptr; // can optimize for riscv64.
@@ -1564,44 +1564,31 @@ void CodeGen::genLclHeap(GenTree* tree)
15641564
// case SP is on the last byte of the guard page. Thus you must
15651565
// touch SP-0 first not SP-0x1000.
15661566
//
1567-
//
15681567
// Note that we go through a few hoops so that SP never points to
15691568
// illegal pages at any time during the tickling process.
1570-
//
1571-
// sltu RA, SP, regCnt
1572-
// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
1573-
// beq RA, REG_R0, Skip
1574-
// addi regCnt, REG_R0, 0
1575-
//
1576-
// Skip:
1577-
// lui regPageSize, eeGetPageSize()>>12
1578-
// addi regTmp, SP, 0
1579-
// Loop:
1580-
// lw r0, 0(regTmp) // tickle the page - read from the page
1581-
// sub regTmp, regTmp, regPageSize
1582-
// bgeu regTmp, regCnt, Loop
1583-
//
1584-
// Done:
1585-
// addi SP, regCnt, 0
1586-
//
15871569

15881570
if (tempReg == REG_NA)
15891571
tempReg = internalRegisters.Extract(tree);
15901572

15911573
assert(regCnt != tempReg);
1592-
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
1574+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb))
1575+
{
1576+
emit->emitIns_R_R_R(INS_maxu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
1577+
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, tempReg, regCnt); // regCnt now holds ultimate SP
1578+
}
1579+
else
1580+
{
1581+
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); // temp = overflow ? 1 : 0;
15931582

1594-
// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
1595-
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
1583+
// sub regCnt, SP, regCnt // regCnt now holds ultimate SP
1584+
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
15961585

1597-
// Overflow, set regCnt to lowest possible value
1598-
BasicBlock* skip = genCreateTempLabel();
1599-
emit->emitIns_J_cond_la(INS_beq, skip, tempReg, REG_R0);
1600-
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, regCnt, REG_R0);
1601-
genDefineTempLabel(skip);
1586+
// If overflow, set regCnt to lowest possible value
1587+
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -1); // temp = overflow ? 0 : full_mask;
1588+
emit->emitIns_R_R_R(INS_and, EA_PTRSIZE, regCnt, regCnt, tempReg); // cnt = overflow ? 0 : cnt;
1589+
}
16021590

16031591
regNumber rPageSize = internalRegisters.GetSingle(tree);
1604-
16051592
noway_assert(rPageSize != tempReg);
16061593

16071594
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
@@ -1620,6 +1607,7 @@ void CodeGen::genLclHeap(GenTree* tree)
16201607
// we're going to assume the worst and probe.
16211608
// Move the final value to SP
16221609
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
1610+
spSourceReg = regCnt; // regCnt may be same as targetReg which gives advantage in returning the address below
16231611
}
16241612

16251613
ALLOC_DONE:
@@ -1647,7 +1635,7 @@ void CodeGen::genLclHeap(GenTree* tree)
16471635
else // stackAdjustment == 0
16481636
{
16491637
// Move the final value of SP to targetReg
1650-
emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_SPBASE);
1638+
emit->emitIns_Mov(EA_PTRSIZE, targetReg, spSourceReg, true);
16511639
}
16521640

16531641
BAILOUT:

src/coreclr/jit/lsrariscv64.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,12 @@ int LinearScan::BuildNode(GenTree* tree)
758758
BuildDef(tree);
759759
break;
760760

761+
case GT_INC_SATURATE:
762+
assert(dstCount == 1);
763+
srcCount = 1;
764+
setDelayFree(BuildUse(tree->gtGetOp1()));
765+
BuildDef(tree);
766+
break;
761767
} // end switch (tree->OperGet())
762768

763769
if (tree->IsUnusedValue() && (dstCount != 0))

0 commit comments

Comments
 (0)