diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b87e3121838dc..00a49f3842844 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36995,6 +36995,18 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } break; } + case ISD::INTRINSIC_WO_CHAIN: { + switch (Op->getConstantOperandVal(0)) { + case Intrinsic::x86_sse2_psad_bw: + case Intrinsic::x86_avx2_psad_bw: + case Intrinsic::x86_avx512_psad_bw_512: + // PSADBW - fills low 16 bits and zeros upper 48 bits of each i64 result. + assert(VT.getScalarType() == MVT::i64 && "Unexpected PSADBW types"); + Known.Zero.setBitsFrom(16); + break; + } + break; + } } // Handle target shuffles. diff --git a/llvm/test/CodeGen/X86/psadbw.ll b/llvm/test/CodeGen/X86/psadbw.ll index 8141b22d321f4..daabb4d9ca408 100644 --- a/llvm/test/CodeGen/X86/psadbw.ll +++ b/llvm/test/CodeGen/X86/psadbw.ll @@ -54,7 +54,6 @@ define <2 x i64> @combine_psadbw_cmp_knownbits(<16 x i8> %a0) nounwind { ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: psadbw %xmm0, %xmm1 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; X86-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: retl ; @@ -64,7 +63,6 @@ define <2 x i64> @combine_psadbw_cmp_knownbits(<16 x i8> %a0) nounwind { ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: psadbw %xmm0, %xmm1 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; X64-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE-NEXT: retq ; @@ -82,28 +80,15 @@ define <2 x i64> @combine_psadbw_cmp_knownbits(<16 x i8> %a0) nounwind { ret <2 x i64> %ext } -; TODO: No need to scalarize the sitofp as the PSADBW results are smaller than i32. +; No need to scalarize the sitofp as the PSADBW results are smaller than i32. define <2 x double> @combine_psadbw_sitofp_knownbits(<16 x i8> %a0) nounwind { ; X86-SSE-LABEL: combine_psadbw_sitofp_knownbits: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $32, %esp ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: psadbw %xmm0, %xmm1 -; X86-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X86-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fstpl (%esp) -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; X86-SSE-NEXT: movl %ebp, %esp -; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X86-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: combine_psadbw_sitofp_knownbits: @@ -111,14 +96,8 @@ define <2 x double> @combine_psadbw_sitofp_knownbits(<16 x i8> %a0) nounwind { ; X64-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: psadbw %xmm0, %xmm1 -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: xorps %xmm0, %xmm0 -; X64-SSE-NEXT: cvtsi2sd %eax, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: xorps %xmm1, %xmm1 -; X64-SSE-NEXT: cvtsi2sd %eax, %xmm1 -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; X64-SSE-NEXT: retq ; ; AVX2-LABEL: combine_psadbw_sitofp_knownbits: @@ -126,10 +105,8 @@ define <2 x double> @combine_psadbw_sitofp_knownbits(<16 x i8> %a0) nounwind { ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm1 -; AVX2-NEXT: vpextrq $1, %xmm0, %rax -; AVX2-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX2-NEXT: retq %mask = and <16 x i8> %a0, %sad = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %mask, <16 x i8> zeroinitializer) @@ -137,28 +114,24 @@ define <2 x double> @combine_psadbw_sitofp_knownbits(<16 x i8> %a0) nounwind { ret <2 x double> %cvt } -; TODO: Convert from uitofp to sitofp as the PSADBW results are zero-extended. +; Convert from uitofp to sitofp as the PSADBW results are zero-extended. define <2 x double> @combine_psadbw_uitofp_knownbits(<16 x i8> %a0) nounwind { ; X86-SSE-LABEL: combine_psadbw_uitofp_knownbits: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pxor %xmm1, %xmm1 -; X86-SSE-NEXT: psadbw %xmm1, %xmm0 -; X86-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movapd {{.*#+}} xmm1 = [0,1160773632,0,1160773632] -; X86-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE-NEXT: addpd %xmm1, %xmm0 +; X86-SSE-NEXT: psadbw %xmm0, %xmm1 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X86-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: combine_psadbw_uitofp_knownbits: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE-NEXT: pxor %xmm1, %xmm1 -; X64-SSE-NEXT: psadbw %xmm1, %xmm0 -; X64-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE-NEXT: movapd {{.*#+}} xmm1 = [4985484787499139072,4985484787499139072] -; X64-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE-NEXT: addpd %xmm1, %xmm0 +; X64-SSE-NEXT: psadbw %xmm0, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; X64-SSE-NEXT: retq ; ; AVX2-LABEL: combine_psadbw_uitofp_knownbits: @@ -166,12 +139,8 @@ define <2 x double> @combine_psadbw_uitofp_knownbits(<16 x i8> %a0) nounwind { ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [4985484787499139072,4985484787499139072] -; AVX2-NEXT: # xmm1 = mem[0,0] -; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX2-NEXT: retq %mask = and <16 x i8> %a0, %sad = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %mask, <16 x i8> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 2a33e75a8357c..ca319687da54d 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -989,9 +989,7 @@ define dso_local i32 @sad_unroll_nonzero_initial(ptr %arg, ptr %arg1, ptr %arg2, ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] ; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; AVX-LABEL: sad_unroll_nonzero_initial: @@ -1053,9 +1051,7 @@ define dso_local i32 @sad_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr % ; SSE2-NEXT: paddd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] ; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; AVX-LABEL: sad_double_reduction: @@ -1067,8 +1063,6 @@ define dso_local i32 @sad_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr % ; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq bb: @@ -1115,9 +1109,7 @@ define dso_local i32 @sad_double_reduction_abs(ptr %arg, ptr %arg1, ptr %arg2, p ; SSE2-NEXT: paddd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] ; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; AVX-LABEL: sad_double_reduction_abs: @@ -1129,8 +1121,6 @@ define dso_local i32 @sad_double_reduction_abs(ptr %arg, ptr %arg1, ptr %arg2, p ; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq bb: