Skip to content

Commit d57c046

Browse files
committed
[InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask values
VPERMILPS lower bits0-3 (to index per-lane i32/f32 0-3) VPERMILPD uses bit1 (to index per-lane i64/f64 0-1) Use SimplifyDemandedBits to ignore anything touching the remaining bits. Part of #106413
1 parent 25c9410 commit d57c046

File tree

3 files changed

+29
-26
lines changed

3 files changed

+29
-26
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
29632963

29642964
case Intrinsic::x86_avx_vpermilvar_ps:
29652965
case Intrinsic::x86_avx_vpermilvar_ps_256:
2966-
case Intrinsic::x86_avx512_vpermilvar_ps_512:
2966+
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2967+
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
2968+
return IC.replaceInstUsesWith(II, V);
2969+
}
2970+
2971+
KnownBits KnownMask(32);
2972+
if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
2973+
return ⅈ
2974+
break;
2975+
}
2976+
29672977
case Intrinsic::x86_avx_vpermilvar_pd:
29682978
case Intrinsic::x86_avx_vpermilvar_pd_256:
2969-
case Intrinsic::x86_avx512_vpermilvar_pd_512:
2979+
case Intrinsic::x86_avx512_vpermilvar_pd_512: {
29702980
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
29712981
return IC.replaceInstUsesWith(II, V);
29722982
}
2983+
2984+
KnownBits KnownMask(64);
2985+
if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
2986+
return ⅈ
29732987
break;
2988+
}
29742989

29752990
case Intrinsic::x86_avx2_permd:
29762991
case Intrinsic::x86_avx2_permps:

llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {
225225

226226
define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
227227
; CHECK-LABEL: @bits_test_vpermilvar_ps(
228-
; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
229-
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
228+
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
230229
; CHECK-NEXT: ret <4 x float> [[S]]
231230
;
232231
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
236235

237236
define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
238237
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
239-
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
240-
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
238+
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
241239
; CHECK-NEXT: ret <8 x float> [[S]]
242240
;
243241
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
247245

248246
define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
249247
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
250-
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
251-
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
248+
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
252249
; CHECK-NEXT: ret <16 x float> [[S]]
253250
;
254251
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
258255

259256
define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
260257
; CHECK-LABEL: @bits_test_vpermilvar_pd(
261-
; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
262-
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
258+
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
263259
; CHECK-NEXT: ret <2 x double> [[S]]
264260
;
265261
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
269265

270266
define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
271267
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
272-
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
273-
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
268+
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
274269
; CHECK-NEXT: ret <4 x double> [[S]]
275270
;
276271
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
280275

281276
define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
282277
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
283-
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
284-
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
278+
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
285279
; CHECK-NEXT: ret <8 x double> [[S]]
286280
;
287281
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>

llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
225225

226226
define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
227227
; CHECK-LABEL: @bits_test_vpermilvar_ps(
228-
; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
229-
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
228+
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
230229
; CHECK-NEXT: ret <4 x float> [[S]]
231230
;
232231
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
236235

237236
define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
238237
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
239-
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
240-
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
238+
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
241239
; CHECK-NEXT: ret <8 x float> [[S]]
242240
;
243241
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
247245

248246
define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
249247
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
250-
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
251-
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
248+
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
252249
; CHECK-NEXT: ret <16 x float> [[S]]
253250
;
254251
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
258255

259256
define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
260257
; CHECK-LABEL: @bits_test_vpermilvar_pd(
261-
; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
262-
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
258+
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
263259
; CHECK-NEXT: ret <2 x double> [[S]]
264260
;
265261
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
269265

270266
define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
271267
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
272-
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
273-
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
268+
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
274269
; CHECK-NEXT: ret <4 x double> [[S]]
275270
;
276271
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
280275

281276
define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
282277
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
283-
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
284-
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
278+
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
285279
; CHECK-NEXT: ret <8 x double> [[S]]
286280
;
287281
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>

0 commit comments

Comments
 (0)