Skip to content

Commit 397bcfe

Browse files
committed
[X86] Fold extract_subvector(int_to_fp(x)) vXi32/vXf32 cases to match existing fp_to_int folds
1 parent 8173ad7 commit 397bcfe

File tree

3 files changed

+27
-109
lines changed

3 files changed

+27
-109
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57203,11 +57203,13 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5720357203
}
5720457204
}
5720557205
// v4i32 CVTPS2DQ(v4f32) / CVTPS2UDQ(v4f32).
57206-
if ((InOpcode == ISD::FP_TO_SINT ||
57207-
(InOpcode == ISD::FP_TO_UINT && Subtarget.hasVLX())) &&
57208-
VT == MVT::v4i32) {
57206+
// v4f32 CVTDQ2PS(v4i32) / CVTUDQ2PS(v4i32).
57207+
if ((InOpcode == ISD::FP_TO_SINT || InOpcode == ISD::SINT_TO_FP ||
57208+
((InOpcode == ISD::FP_TO_UINT || InOpcode == ISD::UINT_TO_FP) &&
57209+
Subtarget.hasVLX())) &&
57210+
(VT == MVT::v4i32 || VT == MVT::v4f32)) {
5720957211
SDValue Src = InVec.getOperand(0);
57210-
if (Src.getValueType().getScalarType() == MVT::f32)
57212+
if (Src.getValueType().getScalarSizeInBits() == 32)
5721157213
return DAG.getNode(InOpcode, DL, VT,
5721257214
extractSubVector(Src, IdxVal, DAG, DL, SizeInBits));
5721357215
}

llvm/test/CodeGen/X86/pr50609.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define void @PR50609(ptr noalias nocapture %RET, ptr noalias %aFOO, <16 x i32> %
1212
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3
1313
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1414
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
15-
; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2
15+
; CHECK-NEXT: vcvtdq2ps %xmm2, %xmm2
1616
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
1717
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
1818
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)

llvm/test/CodeGen/X86/vec_int_to_fp.ll

Lines changed: 20 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,32 +1478,11 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
14781478
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
14791479
; SSE41-NEXT: retq
14801480
;
1481-
; AVX1-LABEL: sitofp_8i16_to_4f32:
1482-
; AVX1: # %bb.0:
1483-
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1484-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1485-
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1486-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1487-
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1488-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1489-
; AVX1-NEXT: vzeroupper
1490-
; AVX1-NEXT: retq
1491-
;
1492-
; AVX2-LABEL: sitofp_8i16_to_4f32:
1493-
; AVX2: # %bb.0:
1494-
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1495-
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1496-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1497-
; AVX2-NEXT: vzeroupper
1498-
; AVX2-NEXT: retq
1499-
;
1500-
; AVX512-LABEL: sitofp_8i16_to_4f32:
1501-
; AVX512: # %bb.0:
1502-
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1503-
; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1504-
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1505-
; AVX512-NEXT: vzeroupper
1506-
; AVX512-NEXT: retq
1481+
; AVX-LABEL: sitofp_8i16_to_4f32:
1482+
; AVX: # %bb.0:
1483+
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1484+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1485+
; AVX-NEXT: retq
15071486
%cvt = sitofp <8 x i16> %a to <8 x float>
15081487
%shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15091488
ret <4 x float> %shuf
@@ -1549,32 +1528,11 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
15491528
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
15501529
; SSE41-NEXT: retq
15511530
;
1552-
; AVX1-LABEL: sitofp_16i8_to_4f32:
1553-
; AVX1: # %bb.0:
1554-
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1555-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1556-
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1557-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1558-
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1559-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1560-
; AVX1-NEXT: vzeroupper
1561-
; AVX1-NEXT: retq
1562-
;
1563-
; AVX2-LABEL: sitofp_16i8_to_4f32:
1564-
; AVX2: # %bb.0:
1565-
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1566-
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1567-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1568-
; AVX2-NEXT: vzeroupper
1569-
; AVX2-NEXT: retq
1570-
;
1571-
; AVX512-LABEL: sitofp_16i8_to_4f32:
1572-
; AVX512: # %bb.0:
1573-
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1574-
; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1575-
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1576-
; AVX512-NEXT: vzeroupper
1577-
; AVX512-NEXT: retq
1531+
; AVX-LABEL: sitofp_16i8_to_4f32:
1532+
; AVX: # %bb.0:
1533+
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
1534+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1535+
; AVX-NEXT: retq
15781536
%cvt = sitofp <16 x i8> %a to <16 x float>
15791537
%shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15801538
ret <4 x float> %shuf
@@ -2354,32 +2312,11 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
23542312
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
23552313
; SSE41-NEXT: retq
23562314
;
2357-
; AVX1-LABEL: uitofp_8i16_to_4f32:
2358-
; AVX1: # %bb.0:
2359-
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2360-
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2361-
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2362-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2363-
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2364-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2365-
; AVX1-NEXT: vzeroupper
2366-
; AVX1-NEXT: retq
2367-
;
2368-
; AVX2-LABEL: uitofp_8i16_to_4f32:
2369-
; AVX2: # %bb.0:
2370-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2371-
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2372-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2373-
; AVX2-NEXT: vzeroupper
2374-
; AVX2-NEXT: retq
2375-
;
2376-
; AVX512-LABEL: uitofp_8i16_to_4f32:
2377-
; AVX512: # %bb.0:
2378-
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2379-
; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2380-
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2381-
; AVX512-NEXT: vzeroupper
2382-
; AVX512-NEXT: retq
2315+
; AVX-LABEL: uitofp_8i16_to_4f32:
2316+
; AVX: # %bb.0:
2317+
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2318+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2319+
; AVX-NEXT: retq
23832320
%cvt = uitofp <8 x i16> %a to <8 x float>
23842321
%shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23852322
ret <4 x float> %shuf
@@ -2425,32 +2362,11 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
24252362
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
24262363
; SSE41-NEXT: retq
24272364
;
2428-
; AVX1-LABEL: uitofp_16i8_to_4f32:
2429-
; AVX1: # %bb.0:
2430-
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2431-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2432-
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2433-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2434-
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2435-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2436-
; AVX1-NEXT: vzeroupper
2437-
; AVX1-NEXT: retq
2438-
;
2439-
; AVX2-LABEL: uitofp_16i8_to_4f32:
2440-
; AVX2: # %bb.0:
2441-
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2442-
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2443-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2444-
; AVX2-NEXT: vzeroupper
2445-
; AVX2-NEXT: retq
2446-
;
2447-
; AVX512-LABEL: uitofp_16i8_to_4f32:
2448-
; AVX512: # %bb.0:
2449-
; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2450-
; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2451-
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2452-
; AVX512-NEXT: vzeroupper
2453-
; AVX512-NEXT: retq
2365+
; AVX-LABEL: uitofp_16i8_to_4f32:
2366+
; AVX: # %bb.0:
2367+
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2368+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2369+
; AVX-NEXT: retq
24542370
%cvt = uitofp <16 x i8> %a to <16 x float>
24552371
%shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24562372
ret <4 x float> %shuf

0 commit comments

Comments
 (0)