Skip to content

Commit 51a0951

Browse files
authored
[InstCombine][X86] Only demand used bits for PSHUFB mask values (#106377)
(V)PSHUFB only uses the sign bit (for zeroing) and the lower 4 bits (to index per-lane byte 0-15) - so use SimplifyDemandedBits to ignore anything touching the remaining bits. Fixes #106256
1 parent 8a50e35 commit 51a0951

File tree

3 files changed

+70
-1
lines changed

3 files changed

+70
-1
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2950,11 +2950,16 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
29502950

29512951
case Intrinsic::x86_ssse3_pshuf_b_128:
29522952
case Intrinsic::x86_avx2_pshuf_b:
2953-
case Intrinsic::x86_avx512_pshuf_b_512:
2953+
case Intrinsic::x86_avx512_pshuf_b_512: {
29542954
if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
29552955
return IC.replaceInstUsesWith(II, V);
29562956
}
2957+
2958+
KnownBits KnownMask(8);
2959+
if (IC.SimplifyDemandedBits(&II, 1, APInt(8, 0b10001111), KnownMask))
2960+
return ⅈ
29572961
break;
2962+
}
29582963

29592964
case Intrinsic::x86_avx_vpermilvar_ps:
29602965
case Intrinsic::x86_avx_vpermilvar_ps_256:

llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) {
468468
ret <64 x i8> %1
469469
}
470470

471+
; Demanded bits tests (PR106256)
472+
473+
define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
474+
; CHECK-LABEL: @demanded_bits_mask(
475+
; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
476+
; CHECK-NEXT: ret <16 x i8> [[S]]
477+
;
478+
%m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
479+
%s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
480+
ret <16 x i8> %s
481+
}
482+
483+
define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
484+
; CHECK-LABEL: @demanded_bits_mask_avx2(
485+
; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
486+
; CHECK-NEXT: ret <32 x i8> [[S]]
487+
;
488+
%m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
489+
%s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
490+
ret <32 x i8> %s
491+
}
492+
493+
define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
494+
; CHECK-LABEL: @demanded_bits_mask_avx512(
495+
; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
496+
; CHECK-NEXT: ret <64 x i8> [[S]]
497+
;
498+
%m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
499+
%s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
500+
ret <64 x i8> %s
501+
}
502+
471503
; Demanded elts tests.
472504

473505
define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {

llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
468468
ret <64 x i8> %1
469469
}
470470

471+
; Demanded bits tests (PR106256)
472+
473+
define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
474+
; CHECK-LABEL: @demanded_bits_mask(
475+
; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
476+
; CHECK-NEXT: ret <16 x i8> [[S]]
477+
;
478+
%m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
479+
%s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
480+
ret <16 x i8> %s
481+
}
482+
483+
define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
484+
; CHECK-LABEL: @demanded_bits_mask_avx2(
485+
; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
486+
; CHECK-NEXT: ret <32 x i8> [[S]]
487+
;
488+
%m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
489+
%s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
490+
ret <32 x i8> %s
491+
}
492+
493+
define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
494+
; CHECK-LABEL: @demanded_bits_mask_avx512(
495+
; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
496+
; CHECK-NEXT: ret <64 x i8> [[S]]
497+
;
498+
%m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
499+
%s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
500+
ret <64 x i8> %s
501+
}
502+
471503
; Demanded elts tests.
472504

473505
define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {

0 commit comments

Comments
 (0)