Skip to content

Commit 2f40d1f

Browse files
committed
ARM64: Fix for Multiplication with Overflow Check
For 4 byte integer multiplication, JIT emits a bad-code which is valid only for 8 byte (i8) multiplication. For the fix, I use ```smull```(signed)/```umull```(unsigned) instructions that contain 8 byte results from 4 byte by 4 byte multiplication. So only one multiplication is needed instead of two for this case. By simply shifting the results, we could get the upper results that is used to detect overflow. Similar transform is made for the unsigned case. Lower is also changed to reserve a register for overflow check. Before smulh w10, w8, w9 --> Incorrect use: smulh is for obtaining the upper bits [127:64] of x8 * x9 mul w8, w8, w9 cmp x10, x8, ASR dotnet#63 After smull x8, x8, x9 --> x8 = w8 * w9 lsr x10, x8, dotnet#32 --> shift the upper bit of x8 to get sign bit cmp w10, w8, ASR #31 --> check sign bit
1 parent 9c84847 commit 2f40d1f

File tree

3 files changed

+52
-34
lines changed

3 files changed

+52
-34
lines changed

src/jit/emitarm64.cpp

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10824,7 +10824,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1082410824
}
1082510825
bool isMulOverflow = false;
1082610826
bool isUnsignedMul = false;
10827-
instruction ins2 = INS_invalid;
1082810827
regNumber extraReg = REG_NA;
1082910828
if (dst->gtOverflowEx())
1083010829
{
@@ -10840,7 +10839,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1084010839
{
1084110840
isMulOverflow = true;
1084210841
isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
10843-
ins2 = isUnsignedMul ? INS_umulh : INS_smulh;
1084410842
assert(intConst == nullptr); // overflow format doesn't support an int constant operand
1084510843
}
1084610844
else
@@ -10856,43 +10854,68 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1085610854
{
1085710855
if (isMulOverflow)
1085810856
{
10857+
// Make sure that we have an internal register
10858+
assert(genCountBits(dst->gtRsvdRegs) == 2);
10859+
10860+
// There will be two bits set in tmpRegsMask.
10861+
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
10862+
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
10863+
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
10864+
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
10865+
10866+
// Make sure the two registers are not the same.
10867+
assert(extraReg != dst->gtRegNum);
10868+
1085910869
if (isUnsignedMul)
1086010870
{
10861-
assert(genCountBits(dst->gtRsvdRegs) == 1);
10862-
extraReg = genRegNumFromMask(dst->gtRsvdRegs);
10871+
if (attr == EA_4BYTE)
10872+
{
10873+
// Compute 8 byte results from 4 byte by 4 byte multiplication.
10874+
emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1086310875

10864-
// Compute the high result
10865-
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
10876+
// Get the high result by shifting dst.
10877+
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
10878+
}
10879+
else
10880+
{
10881+
assert(attr == EA_8BYTE);
10882+
// Compute the high result.
10883+
emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
1086610884

10867-
emitIns_R_I(INS_cmp, EA_8BYTE, extraReg, 0);
10868-
codeGen->genCheckOverflow(dst);
10885+
// Now multiply without skewing the high result.
10886+
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10887+
}
1086910888

10870-
// Now multiply without skewing the high result if no overflow.
10871-
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10889+
// zero-sign bit comparision to detect overflow.
10890+
emitIns_R_I(INS_cmp, attr, extraReg, 0);
1087210891
}
1087310892
else
1087410893
{
10875-
// Make sure that we have an internal register
10876-
assert(genCountBits(dst->gtRsvdRegs) == 2);
10877-
10878-
// There will be two bits set in tmpRegsMask.
10879-
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
10880-
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
10881-
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
10882-
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
10894+
int bitShift = 0;
10895+
if (attr == EA_4BYTE)
10896+
{
10897+
// Compute 8 byte results from 4 byte by 4 byte multiplication.
10898+
emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1088310899

10884-
// Make sure the two registers are not the same.
10885-
assert(extraReg != dst->gtRegNum);
10900+
// Get the high result by shifting dst.
10901+
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
1088610902

10887-
// Save the high result in a temporary register
10888-
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
10903+
bitShift = 31;
10904+
}
10905+
else
10906+
{
10907+
assert(attr == EA_8BYTE);
10908+
// Save the high result in a temporary register.
10909+
emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
1088910910

10890-
// Now multiply without skewing the high result.
10891-
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
10911+
// Now multiply without skewing the high result.
10912+
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
1089210913

10893-
emitIns_R_R_I(INS_cmp, EA_8BYTE, extraReg, dst->gtRegNum, 63, INS_OPTS_ASR);
10914+
bitShift = 63;
10915+
}
1089410916

10895-
codeGen->genCheckOverflow(dst);
10917+
// Sign bit comparision to detect overflow.
10918+
emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
1089610919
}
1089710920
}
1089810921
else
@@ -10902,7 +10925,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
1090210925
}
1090310926
}
1090410927

10905-
if (dst->gtOverflowEx() && !isMulOverflow)
10928+
if (dst->gtOverflowEx())
1090610929
{
1090710930
assert(!varTypeIsFloating(dst));
1090810931
codeGen->genCheckOverflow(dst);

src/jit/lowerarm64.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
347347
break;
348348

349349
case GT_MUL:
350-
if ((tree->gtFlags & GTF_UNSIGNED) != 0)
351-
{
352-
// unsigned mul should only need one register
353-
info->internalIntCount = 1;
354-
}
355-
else if (tree->gtOverflow())
350+
if (tree->gtOverflow())
356351
{
357352
// Need a register different from target reg to check
358353
// for signed overflow.

tests/arm64/Tests.lst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5247,7 +5247,7 @@ WorkingDir=JIT\Regression\CLR-x86-JIT\V1-M09.5-PDC\b32093\b32093
52475247
MaxAllowedDurationSeconds=600
52485248
HostStyle=Any
52495249
Expected=100
5250-
Categories=JIT;EXPECTED_FAIL;NEED_TRIAGE
5250+
Categories=JIT;EXPECTED_PASS
52515251
[lclfldrem_cs_ro.exe_2875]
52525252
RelativePath=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro\lclfldrem_cs_ro.exe
52535253
WorkingDir=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro

0 commit comments

Comments
 (0)