diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 793d62ba2a8e7..7bd7f4bc24178 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -2950,11 +2950,16 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: - case Intrinsic::x86_avx512_pshuf_b_512: + case Intrinsic::x86_avx512_pshuf_b_512: { if (Value *V = simplifyX86pshufb(II, IC.Builder)) { return IC.replaceInstUsesWith(II, V); } + + KnownBits KnownMask(8); + if (IC.SimplifyDemandedBits(&II, 1, APInt(8, 0b10001111), KnownMask)) + return &II; break; + } case Intrinsic::x86_avx_vpermilvar_ps: case Intrinsic::x86_avx_vpermilvar_ps_256: diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll index 2f301e9e9c107..f8cec9c152a08 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll @@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) { ret <64 x i8> %1 } +; Demanded bits tests (PR106256) + +define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask( +; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %m = or <16 x i8> %InMask, + %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m) + ret <16 x i8> %s +} + +define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask_avx2( +; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <32 x i8> [[S]] +; + %m = or <32 x i8> %InMask, + %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m) + ret <32 x i8> %s +} + +define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask_avx512( +; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <64 x i8> [[S]] +; + %m = or <64 x i8> %InMask, + %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m) + ret <64 x i8> %s +} + ; Demanded elts tests. define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll index cd90696eafac6..fd99fd880a809 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll @@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) { ret <64 x i8> %1 } +; Demanded bits tests (PR106256) + +define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask( +; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %m = or <16 x i8> %InMask, + %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m) + ret <16 x i8> %s +} + +define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask_avx2( +; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <32 x i8> [[S]] +; + %m = or <32 x i8> %InMask, + %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m) + ret <32 x i8> %s +} + +define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) { +; CHECK-LABEL: @demanded_bits_mask_avx512( +; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]]) +; CHECK-NEXT: ret <64 x i8> [[S]] +; + %m = or <64 x i8> %InMask, + %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m) + ret <64 x i8> %s +} + ; Demanded elts tests. define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {