Skip to content

Commit 3d862c7

Browse files
committed
[x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat. (REAPPLIED)
As we're after a constant splat value we can avoid all the complexities of trying to recreate the correct constant via getTargetConstantFromNode.
1 parent d148548 commit 3d862c7

File tree

4 files changed

+130
-96
lines changed

4 files changed

+130
-96
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48492,26 +48492,15 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4849248492
(!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
4849348493
return SDValue();
4849448494

48495-
ConstantSDNode *CNode = isConstOrConstSplat(
48496-
N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
48497-
const APInt *C = nullptr;
48498-
if (!CNode) {
48499-
if (VT.isVector())
48500-
if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
48501-
if (auto *SplatC = RawC->getSplatValue())
48502-
if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
48503-
C = &(SplatCI->getValue());
48504-
48505-
if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
48506-
return SDValue();
48507-
} else {
48508-
C = &(CNode->getAPIntValue());
48509-
}
48495+
KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
48496+
if (!Known1.isConstant())
48497+
return SDValue();
4851048498

48511-
if (isPowerOf2_64(C->getZExtValue()))
48499+
const APInt &C = Known1.getConstant();
48500+
if (isPowerOf2_64(C.getZExtValue()) || C.isZero() || C.isAllOnes())
4851248501
return SDValue();
4851348502

48514-
int64_t SignMulAmt = C->getSExtValue();
48503+
int64_t SignMulAmt = C.getSExtValue();
4851548504
assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
4851648505
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
4851748506

@@ -48570,14 +48559,10 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4857048559
if (SignMulAmt < 0)
4857148560
NewMul = DAG.getNegative(NewMul, DL, VT);
4857248561
} else if (!Subtarget.slowLEA())
48573-
NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
48562+
NewMul = combineMulSpecial(C.getZExtValue(), N, DAG, VT, DL);
4857448563
}
4857548564
if (!NewMul) {
4857648565
EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
48577-
assert(C->getZExtValue() != 0 &&
48578-
C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
48579-
"Both cases that could cause potential overflows should have "
48580-
"already been handled.");
4858148566
if (isPowerOf2_64(AbsMulAmt - 1)) {
4858248567
// (mul x, 2^N + 1) => (add (shl x, N), x)
4858348568
NewMul = DAG.getNode(

llvm/test/CodeGen/X86/combine-add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,8 @@ define void @PR52039(ptr %pa, ptr %pb) {
265265
; AVX2: # %bb.0:
266266
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
267267
; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
268-
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
269-
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
268+
; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
269+
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
270270
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
271271
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
272272
; AVX2-NEXT: vzeroupper

llvm/test/CodeGen/X86/mul-constant-i64.ll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,3 +1614,90 @@ define i64 @test_mul_spec(i64 %x) nounwind {
16141614
%mul3 = mul nsw i64 %add, %add2
16151615
ret i64 %mul3
16161616
}
1617+
1618+
define i64 @PR111325(i64 %a0, i1 %a1) {
1619+
; X86-LABEL: PR111325:
1620+
; X86: # %bb.0: # %entry
1621+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1622+
; X86-NEXT: andb $1, %cl
1623+
; X86-NEXT: xorl %eax, %eax
1624+
; X86-NEXT: xorl %edx, %edx
1625+
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
1626+
; X86-NEXT: cmpb $1, %cl
1627+
; X86-NEXT: sbbl %eax, %eax
1628+
; X86-NEXT: orl %edx, %eax
1629+
; X86-NEXT: xorl %edx, %edx
1630+
; X86-NEXT: retl
1631+
;
1632+
; X86-NOOPT-LABEL: PR111325:
1633+
; X86-NOOPT: # %bb.0: # %entry
1634+
; X86-NOOPT-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1635+
; X86-NOOPT-NEXT: andb $1, %cl
1636+
; X86-NOOPT-NEXT: xorl %eax, %eax
1637+
; X86-NOOPT-NEXT: xorl %edx, %edx
1638+
; X86-NOOPT-NEXT: subl {{[0-9]+}}(%esp), %edx
1639+
; X86-NOOPT-NEXT: cmpb $1, %cl
1640+
; X86-NOOPT-NEXT: sbbl %eax, %eax
1641+
; X86-NOOPT-NEXT: orl %edx, %eax
1642+
; X86-NOOPT-NEXT: xorl %edx, %edx
1643+
; X86-NOOPT-NEXT: retl
1644+
;
1645+
; X64-HSW-LABEL: PR111325:
1646+
; X64-HSW: # %bb.0: # %entry
1647+
; X64-HSW-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1648+
; X64-HSW-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1649+
; X64-HSW-NEXT: imull %edi, %eax
1650+
; X64-HSW-NEXT: testb $1, %sil
1651+
; X64-HSW-NEXT: cmoveq %rcx, %rax
1652+
; X64-HSW-NEXT: retq
1653+
;
1654+
; X64-JAG-LABEL: PR111325:
1655+
; X64-JAG: # %bb.0: # %entry
1656+
; X64-JAG-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1657+
; X64-JAG-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1658+
; X64-JAG-NEXT: imull %edi, %eax
1659+
; X64-JAG-NEXT: testb $1, %sil
1660+
; X64-JAG-NEXT: cmoveq %rcx, %rax
1661+
; X64-JAG-NEXT: retq
1662+
;
1663+
; X64-SLM-LABEL: PR111325:
1664+
; X64-SLM: # %bb.0: # %entry
1665+
; X64-SLM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1666+
; X64-SLM-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1667+
; X64-SLM-NEXT: imull %edi, %eax
1668+
; X64-SLM-NEXT: testb $1, %sil
1669+
; X64-SLM-NEXT: cmoveq %rcx, %rax
1670+
; X64-SLM-NEXT: retq
1671+
;
1672+
; X64-HSW-NOOPT-LABEL: PR111325:
1673+
; X64-HSW-NOOPT: # %bb.0: # %entry
1674+
; X64-HSW-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1675+
; X64-HSW-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1676+
; X64-HSW-NOOPT-NEXT: imull %edi, %eax
1677+
; X64-HSW-NOOPT-NEXT: testb $1, %sil
1678+
; X64-HSW-NOOPT-NEXT: cmoveq %rcx, %rax
1679+
; X64-HSW-NOOPT-NEXT: retq
1680+
;
1681+
; X64-JAG-NOOPT-LABEL: PR111325:
1682+
; X64-JAG-NOOPT: # %bb.0: # %entry
1683+
; X64-JAG-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1684+
; X64-JAG-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1685+
; X64-JAG-NOOPT-NEXT: imull %edi, %eax
1686+
; X64-JAG-NOOPT-NEXT: testb $1, %sil
1687+
; X64-JAG-NOOPT-NEXT: cmoveq %rcx, %rax
1688+
; X64-JAG-NOOPT-NEXT: retq
1689+
;
1690+
; X64-SLM-NOOPT-LABEL: PR111325:
1691+
; X64-SLM-NOOPT: # %bb.0: # %entry
1692+
; X64-SLM-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
1693+
; X64-SLM-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
1694+
; X64-SLM-NOOPT-NEXT: imull %edi, %eax
1695+
; X64-SLM-NOOPT-NEXT: testb $1, %sil
1696+
; X64-SLM-NOOPT-NEXT: cmoveq %rcx, %rax
1697+
; X64-SLM-NOOPT-NEXT: retq
1698+
entry:
1699+
%mul = mul i64 %a0, 4294967295
1700+
%mask = and i64 %mul, 4294967295
1701+
%sel = select i1 %a1, i64 %mask, i64 4294967295
1702+
ret i64 %sel
1703+
}

llvm/test/CodeGen/X86/vector-mul.ll

Lines changed: 34 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -335,22 +335,11 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
335335
; SSE-NEXT: paddd %xmm1, %xmm0
336336
; SSE-NEXT: ret{{[l|q]}}
337337
;
338-
; X64-XOP-LABEL: mul_v4i32_17:
339-
; X64-XOP: # %bb.0:
340-
; X64-XOP-NEXT: vpslld $4, %xmm0, %xmm1
341-
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342-
; X64-XOP-NEXT: retq
343-
;
344-
; X64-AVX2-LABEL: mul_v4i32_17:
345-
; X64-AVX2: # %bb.0:
346-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
347-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
348-
; X64-AVX2-NEXT: retq
349-
;
350-
; X64-AVX512DQ-LABEL: mul_v4i32_17:
351-
; X64-AVX512DQ: # %bb.0:
352-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
353-
; X64-AVX512DQ-NEXT: retq
338+
; X64-AVX-LABEL: mul_v4i32_17:
339+
; X64-AVX: # %bb.0:
340+
; X64-AVX-NEXT: vpslld $4, %xmm0, %xmm1
341+
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342+
; X64-AVX-NEXT: retq
354343
%1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
355344
ret <4 x i32> %1
356345
}
@@ -471,13 +460,14 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
471460
;
472461
; X64-AVX2-LABEL: mul_v8i32_17:
473462
; X64-AVX2: # %bb.0:
474-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
475-
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
463+
; X64-AVX2-NEXT: vpslld $4, %ymm0, %ymm1
464+
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
476465
; X64-AVX2-NEXT: retq
477466
;
478467
; X64-AVX512DQ-LABEL: mul_v8i32_17:
479468
; X64-AVX512DQ: # %bb.0:
480-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
469+
; X64-AVX512DQ-NEXT: vpslld $4, %ymm0, %ymm1
470+
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
481471
; X64-AVX512DQ-NEXT: retq
482472
%1 = mul <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
483473
ret <8 x i32> %1
@@ -596,24 +586,13 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind {
596586
; SSE-NEXT: psubd %xmm1, %xmm0
597587
; SSE-NEXT: ret{{[l|q]}}
598588
;
599-
; X64-XOP-LABEL: mul_v4i32_neg33:
600-
; X64-XOP: # %bb.0:
601-
; X64-XOP-NEXT: vpslld $5, %xmm0, %xmm1
602-
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
603-
; X64-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
604-
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
605-
; X64-XOP-NEXT: retq
606-
;
607-
; X64-AVX2-LABEL: mul_v4i32_neg33:
608-
; X64-AVX2: # %bb.0:
609-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967263,4294967263,4294967263,4294967263]
610-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
611-
; X64-AVX2-NEXT: retq
612-
;
613-
; X64-AVX512DQ-LABEL: mul_v4i32_neg33:
614-
; X64-AVX512DQ: # %bb.0:
615-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
616-
; X64-AVX512DQ-NEXT: retq
589+
; X64-AVX-LABEL: mul_v4i32_neg33:
590+
; X64-AVX: # %bb.0:
591+
; X64-AVX-NEXT: vpslld $5, %xmm0, %xmm1
592+
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
593+
; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
594+
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
595+
; X64-AVX-NEXT: retq
617596
%1 = mul <4 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33>
618597
ret <4 x i32> %1
619598
}
@@ -768,13 +747,18 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
768747
;
769748
; X64-AVX2-LABEL: mul_v8i32_neg33:
770749
; X64-AVX2: # %bb.0:
771-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263]
772-
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
750+
; X64-AVX2-NEXT: vpslld $5, %ymm0, %ymm1
751+
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
752+
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
753+
; X64-AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
773754
; X64-AVX2-NEXT: retq
774755
;
775756
; X64-AVX512DQ-LABEL: mul_v8i32_neg33:
776757
; X64-AVX512DQ: # %bb.0:
777-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
758+
; X64-AVX512DQ-NEXT: vpslld $5, %ymm0, %ymm1
759+
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
760+
; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
761+
; X64-AVX512DQ-NEXT: vpsubd %ymm0, %ymm1, %ymm0
778762
; X64-AVX512DQ-NEXT: retq
779763
%1 = mul <8 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
780764
ret <8 x i32> %1
@@ -1113,22 +1097,11 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
11131097
; SSE-NEXT: movdqa %xmm1, %xmm0
11141098
; SSE-NEXT: ret{{[l|q]}}
11151099
;
1116-
; X64-XOP-LABEL: mul_v4i32_7:
1117-
; X64-XOP: # %bb.0:
1118-
; X64-XOP-NEXT: vpslld $3, %xmm0, %xmm1
1119-
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1120-
; X64-XOP-NEXT: retq
1121-
;
1122-
; X64-AVX2-LABEL: mul_v4i32_7:
1123-
; X64-AVX2: # %bb.0:
1124-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
1125-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1126-
; X64-AVX2-NEXT: retq
1127-
;
1128-
; X64-AVX512DQ-LABEL: mul_v4i32_7:
1129-
; X64-AVX512DQ: # %bb.0:
1130-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1131-
; X64-AVX512DQ-NEXT: retq
1100+
; X64-AVX-LABEL: mul_v4i32_7:
1101+
; X64-AVX: # %bb.0:
1102+
; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm1
1103+
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1104+
; X64-AVX-NEXT: retq
11321105
%1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
11331106
ret <4 x i32> %1
11341107
}
@@ -1222,22 +1195,11 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind {
12221195
; SSE-NEXT: psubd %xmm1, %xmm0
12231196
; SSE-NEXT: ret{{[l|q]}}
12241197
;
1225-
; X64-XOP-LABEL: mul_v4i32_neg63:
1226-
; X64-XOP: # %bb.0:
1227-
; X64-XOP-NEXT: vpslld $6, %xmm0, %xmm1
1228-
; X64-XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1229-
; X64-XOP-NEXT: retq
1230-
;
1231-
; X64-AVX2-LABEL: mul_v4i32_neg63:
1232-
; X64-AVX2: # %bb.0:
1233-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967233,4294967233,4294967233,4294967233]
1234-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1235-
; X64-AVX2-NEXT: retq
1236-
;
1237-
; X64-AVX512DQ-LABEL: mul_v4i32_neg63:
1238-
; X64-AVX512DQ: # %bb.0:
1239-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1240-
; X64-AVX512DQ-NEXT: retq
1198+
; X64-AVX-LABEL: mul_v4i32_neg63:
1199+
; X64-AVX: # %bb.0:
1200+
; X64-AVX-NEXT: vpslld $6, %xmm0, %xmm1
1201+
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1202+
; X64-AVX-NEXT: retq
12411203
%1 = mul <4 x i32> %a0, <i32 -63, i32 -63, i32 -63, i32 -63>
12421204
ret <4 x i32> %1
12431205
}

0 commit comments

Comments
 (0)