Skip to content

Commit 74a98fd

Browse files
committed
Revert "[x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat."
> As we're after a constant splat value we can avoid all the complexities of trying to recreate the correct constant via getTargetConstantFromNode. This caused builds to fail with an assertion: X86ISelLowering.cpp:48569 Assertion `C.getZExtValue() != 0 && C.getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) && "Both cases that could cause potential overflows should have " "already been handled." See llvm#111325 This reverts commit 1bc87c9.
1 parent ae6af37 commit 74a98fd

File tree

3 files changed

+94
-45
lines changed

3 files changed

+94
-45
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48492,15 +48492,26 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4849248492
(!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
4849348493
return SDValue();
4849448494

48495-
KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
48496-
if (!Known1.isConstant())
48497-
return SDValue();
48495+
ConstantSDNode *CNode = isConstOrConstSplat(
48496+
N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
48497+
const APInt *C = nullptr;
48498+
if (!CNode) {
48499+
if (VT.isVector())
48500+
if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
48501+
if (auto *SplatC = RawC->getSplatValue())
48502+
if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
48503+
C = &(SplatCI->getValue());
48504+
48505+
if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
48506+
return SDValue();
48507+
} else {
48508+
C = &(CNode->getAPIntValue());
48509+
}
4849848510

48499-
const APInt &C = Known1.getConstant();
48500-
if (isPowerOf2_64(C.getZExtValue()))
48511+
if (isPowerOf2_64(C->getZExtValue()))
4850148512
return SDValue();
4850248513

48503-
int64_t SignMulAmt = C.getSExtValue();
48514+
int64_t SignMulAmt = C->getSExtValue();
4850448515
assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
4850548516
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
4850648517

@@ -48559,12 +48570,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
4855948570
if (SignMulAmt < 0)
4856048571
NewMul = DAG.getNegative(NewMul, DL, VT);
4856148572
} else if (!Subtarget.slowLEA())
48562-
NewMul = combineMulSpecial(C.getZExtValue(), N, DAG, VT, DL);
48573+
NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
4856348574
}
4856448575
if (!NewMul) {
4856548576
EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
48566-
assert(C.getZExtValue() != 0 &&
48567-
C.getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
48577+
assert(C->getZExtValue() != 0 &&
48578+
C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
4856848579
"Both cases that could cause potential overflows should have "
4856948580
"already been handled.");
4857048581
if (isPowerOf2_64(AbsMulAmt - 1)) {

llvm/test/CodeGen/X86/combine-add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,8 @@ define void @PR52039(ptr %pa, ptr %pb) {
265265
; AVX2: # %bb.0:
266266
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
267267
; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
268-
; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
269-
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
268+
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
269+
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
270270
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
271271
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
272272
; AVX2-NEXT: vzeroupper

llvm/test/CodeGen/X86/vector-mul.ll

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,22 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
335335
; SSE-NEXT: paddd %xmm1, %xmm0
336336
; SSE-NEXT: ret{{[l|q]}}
337337
;
338-
; X64-AVX-LABEL: mul_v4i32_17:
339-
; X64-AVX: # %bb.0:
340-
; X64-AVX-NEXT: vpslld $4, %xmm0, %xmm1
341-
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342-
; X64-AVX-NEXT: retq
338+
; X64-XOP-LABEL: mul_v4i32_17:
339+
; X64-XOP: # %bb.0:
340+
; X64-XOP-NEXT: vpslld $4, %xmm0, %xmm1
341+
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
342+
; X64-XOP-NEXT: retq
343+
;
344+
; X64-AVX2-LABEL: mul_v4i32_17:
345+
; X64-AVX2: # %bb.0:
346+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
347+
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
348+
; X64-AVX2-NEXT: retq
349+
;
350+
; X64-AVX512DQ-LABEL: mul_v4i32_17:
351+
; X64-AVX512DQ: # %bb.0:
352+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
353+
; X64-AVX512DQ-NEXT: retq
343354
%1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
344355
ret <4 x i32> %1
345356
}
@@ -460,14 +471,13 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
460471
;
461472
; X64-AVX2-LABEL: mul_v8i32_17:
462473
; X64-AVX2: # %bb.0:
463-
; X64-AVX2-NEXT: vpslld $4, %ymm0, %ymm1
464-
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
474+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
475+
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
465476
; X64-AVX2-NEXT: retq
466477
;
467478
; X64-AVX512DQ-LABEL: mul_v8i32_17:
468479
; X64-AVX512DQ: # %bb.0:
469-
; X64-AVX512DQ-NEXT: vpslld $4, %ymm0, %ymm1
470-
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
480+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
471481
; X64-AVX512DQ-NEXT: retq
472482
%1 = mul <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
473483
ret <8 x i32> %1
@@ -586,13 +596,24 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind {
586596
; SSE-NEXT: psubd %xmm1, %xmm0
587597
; SSE-NEXT: ret{{[l|q]}}
588598
;
589-
; X64-AVX-LABEL: mul_v4i32_neg33:
590-
; X64-AVX: # %bb.0:
591-
; X64-AVX-NEXT: vpslld $5, %xmm0, %xmm1
592-
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
593-
; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
594-
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
595-
; X64-AVX-NEXT: retq
599+
; X64-XOP-LABEL: mul_v4i32_neg33:
600+
; X64-XOP: # %bb.0:
601+
; X64-XOP-NEXT: vpslld $5, %xmm0, %xmm1
602+
; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
603+
; X64-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
604+
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
605+
; X64-XOP-NEXT: retq
606+
;
607+
; X64-AVX2-LABEL: mul_v4i32_neg33:
608+
; X64-AVX2: # %bb.0:
609+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967263,4294967263,4294967263,4294967263]
610+
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
611+
; X64-AVX2-NEXT: retq
612+
;
613+
; X64-AVX512DQ-LABEL: mul_v4i32_neg33:
614+
; X64-AVX512DQ: # %bb.0:
615+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
616+
; X64-AVX512DQ-NEXT: retq
596617
%1 = mul <4 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33>
597618
ret <4 x i32> %1
598619
}
@@ -747,18 +768,13 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
747768
;
748769
; X64-AVX2-LABEL: mul_v8i32_neg33:
749770
; X64-AVX2: # %bb.0:
750-
; X64-AVX2-NEXT: vpslld $5, %ymm0, %ymm1
751-
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
752-
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
753-
; X64-AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
771+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263]
772+
; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
754773
; X64-AVX2-NEXT: retq
755774
;
756775
; X64-AVX512DQ-LABEL: mul_v8i32_neg33:
757776
; X64-AVX512DQ: # %bb.0:
758-
; X64-AVX512DQ-NEXT: vpslld $5, %ymm0, %ymm1
759-
; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
760-
; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
761-
; X64-AVX512DQ-NEXT: vpsubd %ymm0, %ymm1, %ymm0
777+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
762778
; X64-AVX512DQ-NEXT: retq
763779
%1 = mul <8 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
764780
ret <8 x i32> %1
@@ -1097,11 +1113,22 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
10971113
; SSE-NEXT: movdqa %xmm1, %xmm0
10981114
; SSE-NEXT: ret{{[l|q]}}
10991115
;
1100-
; X64-AVX-LABEL: mul_v4i32_7:
1101-
; X64-AVX: # %bb.0:
1102-
; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm1
1103-
; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1104-
; X64-AVX-NEXT: retq
1116+
; X64-XOP-LABEL: mul_v4i32_7:
1117+
; X64-XOP: # %bb.0:
1118+
; X64-XOP-NEXT: vpslld $3, %xmm0, %xmm1
1119+
; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
1120+
; X64-XOP-NEXT: retq
1121+
;
1122+
; X64-AVX2-LABEL: mul_v4i32_7:
1123+
; X64-AVX2: # %bb.0:
1124+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
1125+
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1126+
; X64-AVX2-NEXT: retq
1127+
;
1128+
; X64-AVX512DQ-LABEL: mul_v4i32_7:
1129+
; X64-AVX512DQ: # %bb.0:
1130+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1131+
; X64-AVX512DQ-NEXT: retq
11051132
%1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
11061133
ret <4 x i32> %1
11071134
}
@@ -1195,11 +1222,22 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind {
11951222
; SSE-NEXT: psubd %xmm1, %xmm0
11961223
; SSE-NEXT: ret{{[l|q]}}
11971224
;
1198-
; X64-AVX-LABEL: mul_v4i32_neg63:
1199-
; X64-AVX: # %bb.0:
1200-
; X64-AVX-NEXT: vpslld $6, %xmm0, %xmm1
1201-
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1202-
; X64-AVX-NEXT: retq
1225+
; X64-XOP-LABEL: mul_v4i32_neg63:
1226+
; X64-XOP: # %bb.0:
1227+
; X64-XOP-NEXT: vpslld $6, %xmm0, %xmm1
1228+
; X64-XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1229+
; X64-XOP-NEXT: retq
1230+
;
1231+
; X64-AVX2-LABEL: mul_v4i32_neg63:
1232+
; X64-AVX2: # %bb.0:
1233+
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967233,4294967233,4294967233,4294967233]
1234+
; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1235+
; X64-AVX2-NEXT: retq
1236+
;
1237+
; X64-AVX512DQ-LABEL: mul_v4i32_neg63:
1238+
; X64-AVX512DQ: # %bb.0:
1239+
; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1240+
; X64-AVX512DQ-NEXT: retq
12031241
%1 = mul <4 x i32> %a0, <i32 -63, i32 -63, i32 -63, i32 -63>
12041242
ret <4 x i32> %1
12051243
}

0 commit comments

Comments
 (0)