Skip to content

Commit 445a810

Browse files
committed
ARM64: Fix for Multiplication with Overflow Check
For 4 byte integer multiplication, JIT emits a bad-code which is valid only for 8 byte (i8) multiplication. For the fix, I use ```smull```(signed)/```umull```(unsigned) instructions that contain 8 byte results from 4 byte by 4 byte multiplication. So only one multiplication is needed instead of two for this case. By simply shifting the results, we could get the upper results that is used to detect overflow. Similar transform is made for the unsigned case. Lower is also changed to reserve a register for overflow check. Before smulh w10, w8, w9 --> Incorrect use: smulh is for obtaining the upper bits [127:64] of x8 * x9 mul w8, w8, w9 cmp x10, x8, ASR dotnet#63 After smull x8, x8, x9 --> x8 = w8 * w9 lsr x10, x8, dotnet#32 --> shift the upper bit of x8 to get sign bit cmp w10, w8, ASR #31 --> check sign bit
1 parent 9c84847 commit 445a810

File tree

3 files changed

+50
-34
lines changed

3 files changed

+50
-34
lines changed

src/jit/emitarm64.cpp

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10824,7 +10824,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1082410824
}
1082510825
bool isMulOverflow = false;
1082610826
bool isUnsignedMul = false;
10827-
instruction ins2 = INS_invalid;
1082810827
regNumber extraReg = REG_NA;
1082910828
if (dst->gtOverflowEx())
1083010829
{
@@ -10840,7 +10839,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1084010839
{
1084110840
isMulOverflow = true;
1084210841
isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
10843-
ins2 = isUnsignedMul ? INS_umulh : INS_smulh;
1084410842
assert(intConst == nullptr); // overflow format doesn't support an int constant operand
1084510843
}
1084610844
else
@@ -10856,43 +10854,66 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1085610854
{
1085710855
if (isMulOverflow)
1085810856
{
10857+
// Make sure that we have an internal register
10858+
assert(genCountBits(dst->gtRsvdRegs) == 2);
10859+
10860+
// There will be two bits set in tmpRegsMask.
10861+
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
10862+
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
10863+
assert(tmpRegsMask != RBM_NONE);
10864+
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
10865+
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
10866+
1085910867
if (isUnsignedMul)
1086010868
{
10861-
assert(genCountBits(dst->gtRsvdRegs) == 1);
10862-
extraReg = genRegNumFromMask(dst->gtRsvdRegs);
10869+
if (attr == EA_4BYTE)
10870+
{
10871+
// Compute 8 byte results from 4 byte by 4 byte multiplication.
10872+
emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1086310873

10864-
// Compute the high result
10865-
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
10874+
// Get the high result by shifting dst.
10875+
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
10876+
}
10877+
else
10878+
{
10879+
assert(attr == EA_8BYTE);
10880+
// Compute the high result.
10881+
emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
1086610882

10867-
emitIns_R_I(INS_cmp, EA_8BYTE, extraReg, 0);
10868-
codeGen->genCheckOverflow(dst);
10883+
// Now multiply without skewing the high result.
10884+
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10885+
}
1086910886

10870-
// Now multiply without skewing the high result if no overflow.
10871-
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10887+
// zero-sign bit comparision to detect overflow.
10888+
emitIns_R_I(INS_cmp, attr, extraReg, 0);
1087210889
}
1087310890
else
1087410891
{
10875-
// Make sure that we have an internal register
10876-
assert(genCountBits(dst->gtRsvdRegs) == 2);
10877-
10878-
// There will be two bits set in tmpRegsMask.
10879-
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
10880-
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
10881-
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
10882-
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
10892+
int bitShift = 0;
10893+
if (attr == EA_4BYTE)
10894+
{
10895+
// Compute 8 byte results from 4 byte by 4 byte multiplication.
10896+
emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1088310897

10884-
// Make sure the two registers are not the same.
10885-
assert(extraReg != dst->gtRegNum);
10898+
// Get the high result by shifting dst.
10899+
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
1088610900

10887-
// Save the high result in a temporary register
10888-
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
10901+
bitShift = 31;
10902+
}
10903+
else
10904+
{
10905+
assert(attr == EA_8BYTE);
10906+
// Save the high result in a temporary register.
10907+
emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
1088910908

10890-
// Now multiply without skewing the high result.
10891-
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10909+
// Now multiply without skewing the high result.
10910+
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1089210911

10893-
emitIns_R_R_I(INS_cmp, EA_8BYTE, extraReg, dst->gtRegNum, 63, INS_OPTS_ASR);
10912+
bitShift = 63;
10913+
}
1089410914

10895-
codeGen->genCheckOverflow(dst);
10915+
// Sign bit comparision to detect overflow.
10916+
emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
1089610917
}
1089710918
}
1089810919
else
@@ -10902,7 +10923,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1090210923
}
1090310924
}
1090410925

10905-
if (dst->gtOverflowEx() && !isMulOverflow)
10926+
if (dst->gtOverflowEx())
1090610927
{
1090710928
assert(!varTypeIsFloating(dst));
1090810929
codeGen->genCheckOverflow(dst);

src/jit/lowerarm64.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
347347
break;
348348

349349
case GT_MUL:
350-
if ((tree->gtFlags & GTF_UNSIGNED) != 0)
351-
{
352-
// unsigned mul should only need one register
353-
info->internalIntCount = 1;
354-
}
355-
else if (tree->gtOverflow())
350+
if (tree->gtOverflow())
356351
{
357352
// Need a register different from target reg to check
358353
// for signed overflow.

tests/arm64/Tests.lst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5247,7 +5247,7 @@ WorkingDir=JIT\Regression\CLR-x86-JIT\V1-M09.5-PDC\b32093\b32093
52475247
MaxAllowedDurationSeconds=600
52485248
HostStyle=Any
52495249
Expected=100
5250-
Categories=JIT;EXPECTED_FAIL;NEED_TRIAGE
5250+
Categories=JIT;EXPECTED_PASS
52515251
[lclfldrem_cs_ro.exe_2875]
52525252
RelativePath=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro\lclfldrem_cs_ro.exe
52535253
WorkingDir=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro

0 commit comments

Comments
 (0)