Skip to content

Commit 1bc87c9

Browse files
committed
[x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat.
As we're after a constant splat value we can avoid all the complexities of trying to recreate the correct constant via getTargetConstantFromNode.
1 parent c36afb5 commit 1bc87c9

File tree

3 files changed

+45
-94
lines changed

3 files changed

+45
-94
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -48492,26 +48492,15 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4849248492
(!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
4849348493
return SDValue();
4849448494

48495-
ConstantSDNode *CNode = isConstOrConstSplat(
48496-
N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
48497-
const APInt *C = nullptr;
48498-
if (!CNode) {
48499-
if (VT.isVector())
48500-
if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
48501-
if (auto *SplatC = RawC->getSplatValue())
48502-
if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
48503-
C = &(SplatCI->getValue());
48504-
48505-
if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
48506-
return SDValue();
48507-
} else {
48508-
C = &(CNode->getAPIntValue());
48509-
}
48495+
KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
48496+
if (!Known1.isConstant())
48497+
return SDValue();
4851048498

48511-
if (isPowerOf2_64(C->getZExtValue()))
48499+
const APInt &C = Known1.getConstant();
48500+
if (isPowerOf2_64(C.getZExtValue()))
4851248501
return SDValue();
4851348502

48514-
int64_t SignMulAmt = C->getSExtValue();
48503+
int64_t SignMulAmt = C.getSExtValue();
4851548504
assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
4851648505
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
4851748506

@@ -48570,12 +48559,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4857048559
if (SignMulAmt < 0)
4857148560
NewMul = DAG.getNegative(NewMul, DL, VT);
4857248561
} else if (!Subtarget.slowLEA())
48573-
NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
48562+
NewMul = combineMulSpecial(C.getZExtValue(), N, DAG, VT, DL);
4857448563
}
4857548564
if (!NewMul) {
4857648565
EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
48577-
assert(C->getZExtValue() != 0 &&
48578-
C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
48566+
assert(C.getZExtValue() != 0 &&
48567+
C.getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
4857948568
"Both cases that could cause potential overflows should have "
4858048569
"already been handled.");
4858148570
if (isPowerOf2_64(AbsMulAmt - 1)) {

llvm/test/CodeGen/X86/combine-add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,8 @@ define void @PR52039(ptr %pa, ptr %pb) {
265265
; AVX2: # %bb.0:
266266
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
267267
; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
268-
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
269-
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
268+
; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
269+
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
270270
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
271271
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
272272
; AVX2-NEXT: vzeroupper

llvm/test/CodeGen/X86/vector-mul.ll

Lines changed: 34 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -335,22 +335,11 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
335335
; SSE-NEXT: paddd %xmm1, %xmm0
336336
; SSE-NEXT: ret{{[l|q]}}
337337
;
338-
; X64-XOP-LABEL: mul_v4i32_17:
339-
; X64-XOP: # %bb.0:
340-
; X64-XOP-NEXT: vpslld $4, %xmm0, %xmm1
341-
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342-
; X64-XOP-NEXT: retq
343-
;
344-
; X64-AVX2-LABEL: mul_v4i32_17:
345-
; X64-AVX2: # %bb.0:
346-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
347-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
348-
; X64-AVX2-NEXT: retq
349-
;
350-
; X64-AVX512DQ-LABEL: mul_v4i32_17:
351-
; X64-AVX512DQ: # %bb.0:
352-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
353-
; X64-AVX512DQ-NEXT: retq
338+
; X64-AVX-LABEL: mul_v4i32_17:
339+
; X64-AVX: # %bb.0:
340+
; X64-AVX-NEXT: vpslld $4, %xmm0, %xmm1
341+
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342+
; X64-AVX-NEXT: retq
354343
%1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
355344
ret <4 x i32> %1
356345
}
@@ -471,13 +460,14 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
471460
;
472461
; X64-AVX2-LABEL: mul_v8i32_17:
473462
; X64-AVX2: # %bb.0:
474-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
475-
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
463+
; X64-AVX2-NEXT: vpslld $4, %ymm0, %ymm1
464+
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
476465
; X64-AVX2-NEXT: retq
477466
;
478467
; X64-AVX512DQ-LABEL: mul_v8i32_17:
479468
; X64-AVX512DQ: # %bb.0:
480-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
469+
; X64-AVX512DQ-NEXT: vpslld $4, %ymm0, %ymm1
470+
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
481471
; X64-AVX512DQ-NEXT: retq
482472
%1 = mul <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
483473
ret <8 x i32> %1
@@ -596,24 +586,13 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind {
596586
; SSE-NEXT: psubd %xmm1, %xmm0
597587
; SSE-NEXT: ret{{[l|q]}}
598588
;
599-
; X64-XOP-LABEL: mul_v4i32_neg33:
600-
; X64-XOP: # %bb.0:
601-
; X64-XOP-NEXT: vpslld $5, %xmm0, %xmm1
602-
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
603-
; X64-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
604-
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
605-
; X64-XOP-NEXT: retq
606-
;
607-
; X64-AVX2-LABEL: mul_v4i32_neg33:
608-
; X64-AVX2: # %bb.0:
609-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967263,4294967263,4294967263,4294967263]
610-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
611-
; X64-AVX2-NEXT: retq
612-
;
613-
; X64-AVX512DQ-LABEL: mul_v4i32_neg33:
614-
; X64-AVX512DQ: # %bb.0:
615-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
616-
; X64-AVX512DQ-NEXT: retq
589+
; X64-AVX-LABEL: mul_v4i32_neg33:
590+
; X64-AVX: # %bb.0:
591+
; X64-AVX-NEXT: vpslld $5, %xmm0, %xmm1
592+
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
593+
; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
594+
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
595+
; X64-AVX-NEXT: retq
617596
%1 = mul <4 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33>
618597
ret <4 x i32> %1
619598
}
@@ -768,13 +747,18 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
768747
;
769748
; X64-AVX2-LABEL: mul_v8i32_neg33:
770749
; X64-AVX2: # %bb.0:
771-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263]
772-
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
750+
; X64-AVX2-NEXT: vpslld $5, %ymm0, %ymm1
751+
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
752+
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
753+
; X64-AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
773754
; X64-AVX2-NEXT: retq
774755
;
775756
; X64-AVX512DQ-LABEL: mul_v8i32_neg33:
776757
; X64-AVX512DQ: # %bb.0:
777-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
758+
; X64-AVX512DQ-NEXT: vpslld $5, %ymm0, %ymm1
759+
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
760+
; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
761+
; X64-AVX512DQ-NEXT: vpsubd %ymm0, %ymm1, %ymm0
778762
; X64-AVX512DQ-NEXT: retq
779763
%1 = mul <8 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
780764
ret <8 x i32> %1
@@ -1113,22 +1097,11 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
11131097
; SSE-NEXT: movdqa %xmm1, %xmm0
11141098
; SSE-NEXT: ret{{[l|q]}}
11151099
;
1116-
; X64-XOP-LABEL: mul_v4i32_7:
1117-
; X64-XOP: # %bb.0:
1118-
; X64-XOP-NEXT: vpslld $3, %xmm0, %xmm1
1119-
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1120-
; X64-XOP-NEXT: retq
1121-
;
1122-
; X64-AVX2-LABEL: mul_v4i32_7:
1123-
; X64-AVX2: # %bb.0:
1124-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
1125-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1126-
; X64-AVX2-NEXT: retq
1127-
;
1128-
; X64-AVX512DQ-LABEL: mul_v4i32_7:
1129-
; X64-AVX512DQ: # %bb.0:
1130-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1131-
; X64-AVX512DQ-NEXT: retq
1100+
; X64-AVX-LABEL: mul_v4i32_7:
1101+
; X64-AVX: # %bb.0:
1102+
; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm1
1103+
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1104+
; X64-AVX-NEXT: retq
11321105
%1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
11331106
ret <4 x i32> %1
11341107
}
@@ -1222,22 +1195,11 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind {
12221195
; SSE-NEXT: psubd %xmm1, %xmm0
12231196
; SSE-NEXT: ret{{[l|q]}}
12241197
;
1225-
; X64-XOP-LABEL: mul_v4i32_neg63:
1226-
; X64-XOP: # %bb.0:
1227-
; X64-XOP-NEXT: vpslld $6, %xmm0, %xmm1
1228-
; X64-XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1229-
; X64-XOP-NEXT: retq
1230-
;
1231-
; X64-AVX2-LABEL: mul_v4i32_neg63:
1232-
; X64-AVX2: # %bb.0:
1233-
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967233,4294967233,4294967233,4294967233]
1234-
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1235-
; X64-AVX2-NEXT: retq
1236-
;
1237-
; X64-AVX512DQ-LABEL: mul_v4i32_neg63:
1238-
; X64-AVX512DQ: # %bb.0:
1239-
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1240-
; X64-AVX512DQ-NEXT: retq
1198+
; X64-AVX-LABEL: mul_v4i32_neg63:
1199+
; X64-AVX: # %bb.0:
1200+
; X64-AVX-NEXT: vpslld $6, %xmm0, %xmm1
1201+
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1202+
; X64-AVX-NEXT: retq
12411203
%1 = mul <4 x i32> %a0, <i32 -63, i32 -63, i32 -63, i32 -63>
12421204
ret <4 x i32> %1
12431205
}

0 commit comments

Comments
 (0)