Skip to content

Commit 57115ce

Browse files
authored
[RISC-V] Move comparison transformations from codegen to lowering (#118270)
Codegen was not a good place for these transformations, it resulted in contorted logic and complicated register allocation. Every type of comparison benefits: branch, floating, and integer but that was especially true for non-branch integer comparisons -- only less-than is available so any other comparison must be achieved with additional operations which are much easier to insert/transform to at node level. Inserting sign-extensions (and recognizing when it is necessary) for comparands is also easier at node level. This PR introduces only simple local checks, more comprehensive sign-extension elimination will be attempted in subsequent PRs. Part of #84834, cc @dotnet/samsung
1 parent c928a95 commit 57115ce

File tree

8 files changed

+274
-384
lines changed

8 files changed

+274
-384
lines changed

src/coreclr/jit/codegenriscv64.cpp

Lines changed: 78 additions & 286 deletions
Large diffs are not rendered by default.

src/coreclr/jit/emitriscv64.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ void emitter::emitIns_R_R_I(
738738
(INS_lb <= ins && INS_lhu >= ins) || INS_ld == ins || INS_lw == ins || INS_jalr == ins || INS_fld == ins ||
739739
INS_flw == ins || INS_slli_uw == ins || INS_rori == ins || INS_roriw == ins)
740740
{
741-
assert(isGeneralRegister(reg2));
741+
assert(isGeneralRegisterOrR0(reg2));
742742
code |= (reg1 & 0x1f) << 7; // rd
743743
code |= reg2 << 15; // rs1
744744
code |= imm << 20; // imm
@@ -5433,15 +5433,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
54335433
// n * n bytes will store n bytes result
54345434
emitIns_R_R_R(ins, attr, dstReg, src1Reg, src2Reg);
54355435

5436-
if ((dst->gtFlags & GTF_UNSIGNED) != 0)
5437-
{
5438-
if (attr == EA_4BYTE)
5439-
{
5440-
emitIns_R_R_I(INS_slli, EA_8BYTE, dstReg, dstReg, 32);
5441-
emitIns_R_R_I(INS_srli, EA_8BYTE, dstReg, dstReg, 32);
5442-
}
5443-
}
5444-
54455436
if (needCheckOv)
54465437
{
54475438
assert(tempReg != dstReg);

src/coreclr/jit/lower.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4507,7 +4507,18 @@ GenTree* Lowering::LowerCompare(GenTree* cmp)
45074507
cmp->gtFlags |= GTF_UNSIGNED;
45084508
}
45094509
}
4510-
#endif // TARGET_XARCH
4510+
#elif defined(TARGET_RISCV64)
4511+
if (varTypeUsesIntReg(cmp->gtGetOp1()))
4512+
{
4513+
if (GenTree* next = LowerSavedIntegerCompare(cmp); next != cmp)
4514+
return next;
4515+
4516+
// Integer comparisons are full-register only.
4517+
SignExtendIfNecessary(&cmp->AsOp()->gtOp1);
4518+
SignExtendIfNecessary(&cmp->AsOp()->gtOp2);
4519+
}
4520+
#endif // TARGET_RISCV64
4521+
45114522
ContainCheckCompare(cmp->AsOp());
45124523
return cmp->gtNext;
45134524
}
@@ -7785,7 +7796,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
77857796
{
77867797
divMod->ChangeOper(GT_GE);
77877798
divMod->gtFlags |= GTF_UNSIGNED;
7788-
ContainCheckNode(divMod);
7799+
LowerNode(divMod);
77897800
return true;
77907801
}
77917802
}

src/coreclr/jit/lower.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,13 @@ class Lowering final : public Phase
156156
#ifndef TARGET_64BIT
157157
GenTree* DecomposeLongCompare(GenTree* cmp);
158158
#endif
159-
GenTree* OptimizeConstCompare(GenTree* cmp);
160-
GenTree* LowerCompare(GenTree* cmp);
161-
GenTree* LowerJTrue(GenTreeOp* jtrue);
159+
GenTree* OptimizeConstCompare(GenTree* cmp);
160+
GenTree* LowerCompare(GenTree* cmp);
161+
GenTree* LowerJTrue(GenTreeOp* jtrue);
162+
#ifdef TARGET_RISCV64
163+
GenTree* LowerSavedIntegerCompare(GenTree* cmp);
164+
void SignExtendIfNecessary(GenTree** arg);
165+
#endif
162166
GenTree* LowerSelect(GenTreeConditional* cond);
163167
bool TryLowerConditionToFlagsNode(GenTree* parent,
164168
GenTree* condition,

src/coreclr/jit/lowerriscv64.cpp

Lines changed: 141 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -64,24 +64,19 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
6464

6565
switch (parentNode->OperGet())
6666
{
67-
case GT_EQ:
68-
case GT_NE:
69-
return emitter::isValidSimm12(-immVal) || (immVal == -2048);
70-
71-
case GT_LE: // a <= N -> a < N+1
72-
case GT_GT: // a > N -> !(a <= N) -> !(a < N+1)
73-
immVal += 1;
74-
FALLTHROUGH;
7567
case GT_LT:
7668
case GT_GE:
7769
case GT_ADD:
7870
case GT_AND:
7971
case GT_OR:
8072
case GT_XOR:
8173
return emitter::isValidSimm12(immVal);
82-
case GT_JCMP:
83-
return true;
8474

75+
case GT_EQ:
76+
case GT_NE:
77+
case GT_GT:
78+
case GT_LE:
79+
case GT_JCMP:
8580
case GT_CMPXCHG:
8681
case GT_XORR:
8782
case GT_XAND:
@@ -133,57 +128,158 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
133128
//
134129
GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
135130
{
136-
GenTree* op = jtrue->gtGetOp1();
137-
GenCondition cond;
138-
GenTree* cmpOp1;
139-
GenTree* cmpOp2;
140-
141-
assert(!op->OperIsCompare() || op->OperIsCmpCompare()); // We do not expect any other relops on RISCV64
131+
GenTree* cmp = jtrue->gtGetOp1();
132+
assert(!cmp->OperIsCompare() || cmp->OperIsCmpCompare()); // We do not expect any other relops on RISCV64
142133

143-
if (op->OperIsCompare() && !varTypeIsFloating(op->gtGetOp1()))
134+
// for RISCV64's compare and condition-branch instructions, it's very similar to the IL instructions.
135+
jtrue->ChangeOper(GT_JCMP);
136+
GenTreeOpCC* jcmp = jtrue->AsOpCC();
137+
if (cmp->OperIsCompare() && !varTypeIsFloating(cmp->gtGetOp1()))
144138
{
145-
cond = GenCondition::FromRelop(op);
146-
147-
cmpOp1 = op->gtGetOp1();
148-
cmpOp2 = op->gtGetOp2();
149-
150-
// We will fall through and turn this into a JCMP(op1, op2, kind), but need to remove the relop here.
151-
BlockRange().Remove(op);
139+
jcmp->gtCondition = GenCondition::FromIntegralRelop(cmp);
140+
jcmp->gtOp1 = cmp->gtGetOp1();
141+
jcmp->gtOp2 = cmp->gtGetOp2();
142+
BlockRange().Remove(cmp);
152143
}
153144
else
154145
{
146+
// branch if (cond) ---> branch if (cond != 0)
155147
GenCondition::Code code = GenCondition::NE;
156-
if (op->OperIsCompare() && varTypeIsFloating(op->gtGetOp1()) && (op->gtFlags & GTF_RELOP_NAN_UN) != 0)
148+
if (cmp->OperIsCompare() && varTypeIsFloating(cmp->gtGetOp1()) && (cmp->gtFlags & GTF_RELOP_NAN_UN) != 0)
157149
{
158150
// Unordered floating-point comparisons are achieved by neg'ing the ordered counterparts. Avoid that by
159151
// reversing both the FP comparison and the zero-comparison fused with the branch.
160-
op->ChangeOper(GenTree::ReverseRelop(op->OperGet()));
161-
op->gtFlags &= ~GTF_RELOP_NAN_UN;
152+
cmp->ChangeOper(GenTree::ReverseRelop(cmp->OperGet()));
153+
cmp->gtFlags &= ~GTF_RELOP_NAN_UN;
162154
code = GenCondition::EQ;
163155
}
164-
cond = GenCondition(code);
156+
jcmp->gtCondition = GenCondition(code);
157+
jcmp->gtOp1 = cmp;
158+
jcmp->gtOp2 = comp->gtNewZeroConNode(cmp->TypeGet());
159+
BlockRange().InsertBefore(jcmp, jcmp->gtOp2);
160+
}
165161

166-
cmpOp1 = op;
167-
cmpOp2 = comp->gtNewZeroConNode(cmpOp1->TypeGet());
162+
// Comparisons fused with branches don't have immediates, re-evaluate containment for 'zero' register
163+
if (!CheckImmedAndMakeContained(jcmp, jcmp->gtOp2))
164+
jcmp->gtOp2->ClearContained();
168165

169-
BlockRange().InsertBefore(jtrue, cmpOp2);
166+
return jcmp->gtNext;
167+
}
170168

171-
// Fall through and turn this into a JCMP(op1, 0, NE).
172-
}
169+
//------------------------------------------------------------------------
170+
// LowerSavedIntegerCompare: lowers a integer comparison saved to a register so that it matches the available
171+
// instructions better
172+
//
173+
// Arguments:
174+
// cmp - the integer comparison to lower
175+
//
176+
// Return Value:
177+
// The original compare node if lowering should proceed as usual or the next node to lower if the compare node was
178+
// changed in such a way that lowering is no longer needed.
179+
//
180+
GenTree* Lowering::LowerSavedIntegerCompare(GenTree* cmp)
181+
{
182+
// Branches have a full range of comparisons, these transformations would be counter-productive
183+
LIR::Use cmpUse;
184+
if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperIs(GT_JTRUE))
185+
return cmp;
173186

174-
// for RISCV64's compare and condition-branch instructions,
175-
// it's very similar to the IL instructions.
176-
jtrue->ChangeOper(GT_JCMP);
177-
jtrue->gtOp1 = cmpOp1;
178-
jtrue->gtOp2 = cmpOp2;
179-
jtrue->AsOpCC()->gtCondition = cond;
187+
GenTree*& left = cmp->AsOp()->gtOp1;
188+
GenTree*& right = cmp->AsOp()->gtOp2;
189+
assert(cmp->OperIsCmpCompare() && varTypeUsesIntReg(left));
190+
191+
if (cmp->OperIs(GT_EQ, GT_NE) && !right->IsIntegralConst(0))
192+
{
193+
// Only equality with zero is supported
194+
// a == b ---> (a - b) == 0
195+
var_types type = genActualTypeIsInt(left) ? TYP_INT : TYP_I_IMPL;
196+
genTreeOps oper = GT_SUB;
197+
if (right->IsIntegralConst() && !right->AsIntCon()->ImmedValNeedsReloc(comp))
198+
{
199+
INT64 value = right->AsIntConCommon()->IntegralValue();
200+
INT64 minVal = (type == TYP_INT) ? INT_MIN : SSIZE_T_MIN;
201+
202+
const INT64 min12BitImm = -2048;
203+
if (value == min12BitImm)
204+
{
205+
// (a - C) == 0 ---> (a ^ C) == 0
206+
oper = GT_XOR;
207+
}
208+
else if (!right->TypeIs(TYP_BYREF) && value != minVal)
209+
{
210+
// a - C ---> a + (-C)
211+
oper = GT_ADD;
212+
right->AsIntConCommon()->SetIntegralValue(-value);
213+
}
214+
}
215+
left = comp->gtNewOperNode(oper, type, left, right);
216+
right = comp->gtNewZeroConNode(type);
217+
BlockRange().InsertBefore(cmp, left, right);
218+
ContainCheckBinary(left->AsOp());
219+
}
180220

181-
if (cmpOp2->IsCnsIntOrI())
221+
if (!right->TypeIs(TYP_BYREF) && right->IsIntegralConst() && !right->AsIntConCommon()->ImmedValNeedsReloc(comp))
182222
{
183-
cmpOp2->SetContained();
223+
if (cmp->OperIs(GT_LE, GT_GE))
224+
{
225+
// a <= C ---> a < C+1
226+
// a >= C ---> a > C-1
227+
INT64 value = right->AsIntConCommon()->IntegralValue();
228+
229+
bool isOverflow;
230+
if (cmp->OperIs(GT_LE))
231+
{
232+
isOverflow = genActualTypeIsInt(left)
233+
? CheckedOps::AddOverflows((INT32)value, (INT32)1, cmp->IsUnsigned())
234+
: CheckedOps::AddOverflows((INT64)value, (INT64)1, cmp->IsUnsigned());
235+
}
236+
else
237+
{
238+
isOverflow = genActualTypeIsInt(left)
239+
? CheckedOps::SubOverflows((INT32)value, (INT32)1, cmp->IsUnsigned())
240+
: CheckedOps::SubOverflows((INT64)value, (INT64)1, cmp->IsUnsigned());
241+
}
242+
if (!isOverflow)
243+
{
244+
right->AsIntConCommon()->SetIntegralValue(cmp->OperIs(GT_LE) ? value + 1 : value - 1);
245+
cmp->SetOperRaw(cmp->OperIs(GT_LE) ? GT_LT : GT_GT);
246+
}
247+
}
248+
249+
if (cmp->OperIs(GT_LT) && right->IsIntegralConst(0) && !cmp->IsUnsigned())
250+
{
251+
// a < 0 (signed) ---> shift the sign bit into the lowest bit
252+
cmp->SetOperRaw(GT_RSZ);
253+
cmp->ChangeType(genActualType(left));
254+
right->AsIntConCommon()->SetIntegralValue(genTypeSize(cmp) * BITS_PER_BYTE - 1);
255+
right->SetContained();
256+
return cmp->gtNext;
257+
}
184258
}
259+
return cmp;
260+
}
261+
262+
//------------------------------------------------------------------------
263+
// SignExtendIfNecessary: inserts a 32-bit sign extension unless the argument is full-register or is known to be
264+
// implemented with a sign-extending instruction.
265+
//
266+
// Arguments:
267+
// arg - the argument to sign-extend
268+
//
269+
void Lowering::SignExtendIfNecessary(GenTree** arg)
270+
{
271+
assert(varTypeUsesIntReg(*arg));
272+
if (!genActualTypeIsInt(*arg))
273+
return;
274+
275+
if ((*arg)->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_MOD, GT_UMOD, GT_DIV, GT_UDIV, GT_CNS_INT))
276+
return;
185277

186-
return jtrue->gtNext;
278+
if ((*arg)->OperIsShiftOrRotate() || (*arg)->OperIsCmpCompare() || (*arg)->OperIsAtomicOp())
279+
return;
280+
281+
*arg = comp->gtNewCastNode(TYP_I_IMPL, *arg, false, TYP_I_IMPL);
282+
BlockRange().InsertAfter((*arg)->gtGetOp1(), *arg);
187283
}
188284

189285
//------------------------------------------------------------------------
@@ -1140,6 +1236,9 @@ void Lowering::ContainCheckCast(GenTreeCast* node)
11401236
//
11411237
void Lowering::ContainCheckCompare(GenTreeOp* cmp)
11421238
{
1239+
if (cmp->gtOp1->IsIntegralConst(0) && !cmp->gtOp1->AsIntCon()->ImmedValNeedsReloc(comp))
1240+
MakeSrcContained(cmp, cmp->gtOp1); // use 'zero' register
1241+
11431242
CheckImmedAndMakeContained(cmp, cmp->gtOp2);
11441243
}
11451244

src/coreclr/jit/lsra.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9065,23 +9065,27 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
90659065

90669066
if (lastNode->OperIs(GT_JTRUE, GT_JCMP, GT_JTEST))
90679067
{
9068-
GenTree* op = lastNode->gtGetOp1();
9069-
consumedRegs |= genSingleTypeRegMask(op->GetRegNum());
9070-
9071-
if (op->OperIs(GT_COPY))
9072-
{
9073-
GenTree* srcOp = op->gtGetOp1();
9074-
consumedRegs |= genSingleTypeRegMask(srcOp->GetRegNum());
9075-
}
9076-
else if (op->IsLocal())
9068+
assert(!lastNode->OperIs(GT_JTRUE) || !lastNode->gtGetOp1()->isContained());
9069+
if (!lastNode->gtGetOp1()->isContained())
90779070
{
9078-
GenTreeLclVarCommon* lcl = op->AsLclVarCommon();
9079-
terminatorNodeLclVarDsc = &compiler->lvaTable[lcl->GetLclNum()];
9071+
GenTree* op = lastNode->gtGetOp1();
9072+
consumedRegs |= genSingleTypeRegMask(op->GetRegNum());
9073+
9074+
if (op->OperIs(GT_COPY))
9075+
{
9076+
GenTree* srcOp = op->gtGetOp1();
9077+
consumedRegs |= genSingleTypeRegMask(srcOp->GetRegNum());
9078+
}
9079+
else if (op->IsLocal())
9080+
{
9081+
GenTreeLclVarCommon* lcl = op->AsLclVarCommon();
9082+
terminatorNodeLclVarDsc = &compiler->lvaTable[lcl->GetLclNum()];
9083+
}
90809084
}
90819085

90829086
if (lastNode->OperIs(GT_JCMP, GT_JTEST) && !lastNode->gtGetOp2()->isContained())
90839087
{
9084-
op = lastNode->gtGetOp2();
9088+
GenTree* op = lastNode->gtGetOp2();
90859089
consumedRegs |= genSingleTypeRegMask(op->GetRegNum());
90869090

90879091
if (op->OperIs(GT_COPY))

src/coreclr/jit/lsrariscv64.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,6 @@ int LinearScan::BuildNode(GenTree* tree)
202202
srcCount = BuildOperandUses(tree->gtGetOp1());
203203
break;
204204

205-
case GT_JTRUE:
206-
srcCount = 0;
207-
assert(dstCount == 0);
208-
break;
209-
210205
case GT_JMP:
211206
srcCount = 0;
212207
assert(dstCount == 0);
@@ -440,30 +435,6 @@ int LinearScan::BuildNode(GenTree* tree)
440435
case GT_LE:
441436
case GT_GE:
442437
case GT_GT:
443-
{
444-
var_types op1Type = genActualType(tree->gtGetOp1()->TypeGet());
445-
if (!varTypeIsFloating(op1Type))
446-
{
447-
emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
448-
if (cmpSize == EA_4BYTE)
449-
{
450-
GenTree* op2 = tree->gtGetOp2();
451-
452-
bool isUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
453-
bool useAddSub = !(!tree->OperIs(GT_EQ, GT_NE) || op2->IsIntegralConst(-2048));
454-
bool useShiftRight = !isUnsigned && ((tree->OperIs(GT_LT) && op2->IsIntegralConst(0)) ||
455-
(tree->OperIs(GT_LE) && op2->IsIntegralConst(-1)));
456-
bool useLoadImm = isUnsigned && ((tree->OperIs(GT_LT, GT_GE) && op2->IsIntegralConst(0)) ||
457-
(tree->OperIs(GT_LE, GT_GT) && op2->IsIntegralConst(-1)));
458-
459-
if (!useAddSub && !useShiftRight && !useLoadImm)
460-
buildInternalIntRegisterDefForNode(tree);
461-
}
462-
}
463-
buildInternalRegisterUses();
464-
}
465-
FALLTHROUGH;
466-
467438
case GT_JCMP:
468439
srcCount = BuildCmp(tree);
469440
break;

0 commit comments

Comments
 (0)