Skip to content

Commit a46a2c2

Browse files
authored
[X86] Lower vXi8 multiplies using PMADDUBSW on SSSE3+ targets (#95690)
Extends #95403 to handle non-constant cases - we can avoid unpacks/extensions from vXi8 to vXi16 by using PMADDUBSW instead and truncating the vXi16 results back together. Most targets benefit from performing this for non-constant cases - its just Intel Core/SandyBridge era CPUs that might experience additional Port0/15 contention (but lower instruction count). Fixes #90748
1 parent 9acb533 commit a46a2c2

18 files changed

+953
-947
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28503,17 +28503,19 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
2850328503

2850428504
MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
2850528505

28506-
// For vXi8 mul-by-constant, try PMADDUBSW to avoid the need for extension.
28506+
// For vXi8 mul, try PMADDUBSW to avoid the need for extension.
2850728507
// Don't do this if we only need to unpack one half.
28508-
if (Subtarget.hasSSSE3() &&
28509-
ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
28510-
bool IsLoLaneAllZeroOrUndef = true;
28511-
bool IsHiLaneAllZeroOrUndef = true;
28512-
for (auto [Idx, Val] : enumerate(B->ops())) {
28513-
if ((Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
28514-
IsHiLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
28515-
else
28516-
IsLoLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
28508+
if (Subtarget.hasSSSE3()) {
28509+
bool BIsBuildVector = isa<BuildVectorSDNode>(B);
28510+
bool IsLoLaneAllZeroOrUndef = BIsBuildVector;
28511+
bool IsHiLaneAllZeroOrUndef = BIsBuildVector;
28512+
if (BIsBuildVector) {
28513+
for (auto [Idx, Val] : enumerate(B->ops())) {
28514+
if ((Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
28515+
IsHiLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
28516+
else
28517+
IsLoLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
28518+
}
2851728519
}
2851828520
if (!(IsLoLaneAllZeroOrUndef || IsHiLaneAllZeroOrUndef)) {
2851928521
SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT));

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -852,8 +852,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
852852
{ ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } }, // psubw
853853

854854
{ ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } }, // extend/pmullw/trunc
855-
{ ISD::MUL, MVT::v32i8, { 6, 11,10,11 } }, // extend/pmullw/trunc
856-
{ ISD::MUL, MVT::v64i8, { 6, 12,10,11 } }, // unpack/pmullw
855+
{ ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } }, // pmaddubsw
856+
{ ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } }, // pmaddubsw
857857
{ ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } }, // pmullw
858858

859859
{ ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } }, // psubb
@@ -1119,7 +1119,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11191119
{ ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } }, // paddq
11201120

11211121
{ ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } }, // extend/pmullw/pack
1122-
{ ISD::MUL, MVT::v32i8, { 6, 11,10,20 } }, // unpack/pmullw
1122+
{ ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } }, // pmaddubsw
11231123
{ ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } }, // pmullw
11241124
{ ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } }, // pmulld
11251125
{ ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } }, // pmulld
@@ -1170,8 +1170,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11701170
// We don't have to scalarize unsupported ops. We can issue two half-sized
11711171
// operations and we only need to extract the upper YMM half.
11721172
// Two ops + 1 extract + 1 insert = 4.
1173-
{ ISD::MUL, MVT::v32i8, { 12, 12, 22, 23 } }, // unpack/pmullw + split
1174-
{ ISD::MUL, MVT::v16i8, { 5, 6, 10, 12 } }, // unpack/pmullw
1173+
{ ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } }, // pmaddubsw + split
1174+
{ ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } }, // 2*pmaddubsw/3*and/psllw/or
11751175
{ ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } }, // pmullw + split
11761176
{ ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } }, // pmulld + split
11771177
{ ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } }, // pmulld
@@ -1311,7 +1311,6 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
13111311
{ ISD::SRA, MVT::v4i32, { 16, 17,15,19 } }, // Shift each lane + blend.
13121312
{ ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } }, // splat+shuffle sequence.
13131313

1314-
{ ISD::MUL, MVT::v16i8, { 6, 18,10,12 } }, // 2*unpack/2*pmullw/2*and/pack
13151314
{ ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } } // pmulld (Nehalem from agner.org)
13161315
};
13171316

@@ -1320,6 +1319,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
13201319
if (auto KindCost = Entry->Cost[CostKind])
13211320
return LT.first * *KindCost;
13221321

1322+
static const CostKindTblEntry SSSE3CostTable[] = {
1323+
{ ISD::MUL, MVT::v16i8, { 5, 18,10,12 } }, // 2*pmaddubsw/3*and/psllw/or
1324+
};
1325+
1326+
if (ST->hasSSSE3())
1327+
if (const auto *Entry = CostTableLookup(SSSE3CostTable, ISD, LT.second))
1328+
if (auto KindCost = Entry->Cost[CostKind])
1329+
return LT.first * *KindCost;
1330+
13231331
static const CostKindTblEntry SSE2CostTable[] = {
13241332
// We don't correctly identify costs of casts because they are marked as
13251333
// custom.

llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -791,9 +791,9 @@ define i32 @mul(i32 %arg) {
791791
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
792792
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
793793
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
794-
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
795-
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = mul <32 x i8> undef, undef
796-
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = mul <64 x i8> undef, undef
794+
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = mul <16 x i8> undef, undef
795+
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
796+
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = mul <64 x i8> undef, undef
797797
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
798798
;
799799
; SSE42-LABEL: 'mul'
@@ -835,9 +835,9 @@ define i32 @mul(i32 %arg) {
835835
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
836836
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
837837
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
838-
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = mul <16 x i8> undef, undef
839-
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = mul <32 x i8> undef, undef
840-
; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = mul <64 x i8> undef, undef
838+
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = mul <16 x i8> undef, undef
839+
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = mul <32 x i8> undef, undef
840+
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = mul <64 x i8> undef, undef
841841
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
842842
;
843843
; AVX2-LABEL: 'mul'
@@ -858,8 +858,8 @@ define i32 @mul(i32 %arg) {
858858
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
859859
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
860860
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
861-
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
862-
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
861+
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
862+
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = mul <64 x i8> undef, undef
863863
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
864864
;
865865
; AVX512F-LABEL: 'mul'
@@ -880,7 +880,7 @@ define i32 @mul(i32 %arg) {
880880
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
881881
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
882882
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
883-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
883+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
884884
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
885885
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
886886
;
@@ -902,8 +902,8 @@ define i32 @mul(i32 %arg) {
902902
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
903903
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
904904
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
905-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
906-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
905+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = mul <32 x i8> undef, undef
906+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = mul <64 x i8> undef, undef
907907
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
908908
;
909909
; AVX512DQ-LABEL: 'mul'
@@ -924,7 +924,7 @@ define i32 @mul(i32 %arg) {
924924
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
925925
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
926926
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
927-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
927+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
928928
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
929929
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
930930
;

llvm/test/Analysis/CostModel/X86/arith-int-latency.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ define i32 @mul(i32 %arg) {
680680
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
681681
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
682682
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
683-
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
684-
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
683+
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
684+
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = mul <64 x i8> undef, undef
685685
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
686686
;
687687
; AVX2-LABEL: 'mul'
@@ -702,8 +702,8 @@ define i32 @mul(i32 %arg) {
702702
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
703703
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
704704
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
705-
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
706-
; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = mul <64 x i8> undef, undef
705+
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
706+
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = mul <64 x i8> undef, undef
707707
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
708708
;
709709
; AVX512F-LABEL: 'mul'
@@ -724,7 +724,7 @@ define i32 @mul(i32 %arg) {
724724
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
725725
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
726726
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
727-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
727+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
728728
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
729729
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
730730
;
@@ -746,8 +746,8 @@ define i32 @mul(i32 %arg) {
746746
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
747747
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
748748
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
749-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
750-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = mul <64 x i8> undef, undef
749+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
750+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
751751
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
752752
;
753753
; AVX512DQ-LABEL: 'mul'
@@ -768,7 +768,7 @@ define i32 @mul(i32 %arg) {
768768
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
769769
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
770770
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
771-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
771+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
772772
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
773773
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
774774
;

llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ define i32 @mul(i32 %arg) {
680680
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
681681
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
682682
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
683-
; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = mul <32 x i8> undef, undef
684-
; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = mul <64 x i8> undef, undef
683+
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
684+
; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = mul <64 x i8> undef, undef
685685
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
686686
;
687687
; AVX2-LABEL: 'mul'
@@ -702,8 +702,8 @@ define i32 @mul(i32 %arg) {
702702
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
703703
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
704704
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
705-
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
706-
; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = mul <64 x i8> undef, undef
705+
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
706+
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = mul <64 x i8> undef, undef
707707
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
708708
;
709709
; AVX512F-LABEL: 'mul'
@@ -724,7 +724,7 @@ define i32 @mul(i32 %arg) {
724724
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
725725
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
726726
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
727-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
727+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
728728
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
729729
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
730730
;
@@ -746,8 +746,8 @@ define i32 @mul(i32 %arg) {
746746
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
747747
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
748748
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
749-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
750-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
749+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
750+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
751751
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
752752
;
753753
; AVX512DQ-LABEL: 'mul'
@@ -768,7 +768,7 @@ define i32 @mul(i32 %arg) {
768768
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
769769
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
770770
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
771-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
771+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
772772
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
773773
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
774774
;

0 commit comments

Comments
 (0)