diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 52ec4753ec4c1..ef5d833c03428 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10788,7 +10788,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, if (VT == MVT::i32) AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV, DAG.getConstant(0, DL, MVT::i64)); - AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV); + else + AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV), + DAG.getConstant(0, DL, MVT::i64)); if (IsParity) AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT)); return AddV; @@ -10797,7 +10800,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val); SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v16i8, CtPop); - AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV); + AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, + DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v2i64, AddV), + DAG.getConstant(0, DL, MVT::i64)); + AddV = DAG.getZExtOrTrunc(AddV, DL, VT); if (IsParity) AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT)); return AddV; diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll index 369667ec33f66..d06e42f5405ef 100644 --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -129,7 +129,6 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone { ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b ; CHECK-BE-NEXT: cnt v0.8b, v0.8b ; CHECK-BE-NEXT: addv b0, v0.8b -; CHECK-BE-NEXT: rev64 v0.8b, v0.8b ; CHECK-BE-NEXT: fmov x0, d0 ; CHECK-BE-NEXT: ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) @@ -436,9 +435,9 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov.d v0[1], x1 +; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: cnt.16b v0, v0 ; CHECK-NEXT: addv.16b b0, v0 -; CHECK-NEXT: mov.d x1, v0[1] ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret ; @@ -481,13 +480,12 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; CHECK-BE-LABEL: cnt128: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: fmov d0, x0 +; CHECK-BE-NEXT: mov x0, xzr ; CHECK-BE-NEXT: mov v0.d[1], x1 ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b ; CHECK-BE-NEXT: cnt v0.16b, v0.16b ; CHECK-BE-NEXT: addv b0, v0.16b -; CHECK-BE-NEXT: rev64 v0.16b, v0.16b -; CHECK-BE-NEXT: mov x1, v0.d[1] -; CHECK-BE-NEXT: fmov x0, d0 +; CHECK-BE-NEXT: fmov x1, d0 ; CHECK-BE-NEXT: ret %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) ret i128 %cnt diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll index 1e51793fb5f91..91515277cb3f6 100644 --- a/llvm/test/CodeGen/AArch64/parity.ll +++ b/llvm/test/CodeGen/AArch64/parity.ll @@ -159,7 +159,7 @@ define i32 @parity_64_trunc(i64 %x) { ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: cnt v0.8b, v0.8b ; CHECK-NEXT: addv b0, v0.8b -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll index 6cc925f0ae91f..e664e73594923 100644 --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -41,8 +41,8 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) { ; BE-NEXT: rev64 v0.16b, v0.16b ; BE-NEXT: cnt v0.16b, v0.16b ; BE-NEXT: addv b0, v0.16b -; BE-NEXT: rev32 v0.16b, v0.16b -; BE-NEXT: mov w0, v0.s[3] +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: mov w0, v0.s[1] ; BE-NEXT: ret ; ; GISEL-LABEL: popcount128: @@ -138,10 +138,10 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) { ; BE-NEXT: cnt v1.16b, v1.16b ; BE-NEXT: addv b0, v0.16b ; BE-NEXT: addv b1, v1.16b -; BE-NEXT: rev32 v0.16b, v0.16b -; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: mov w8, v0.s[3] -; BE-NEXT: mov w9, v1.s[3] +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: rev64 v1.4s, v1.4s +; BE-NEXT: mov w8, v0.s[1] +; BE-NEXT: mov w9, v1.s[1] ; BE-NEXT: add w0, w9, w8 ; BE-NEXT: ret ; @@ -227,22 +227,21 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: addv b0, v0.16b -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret ; ; BE-LABEL: popcount1x128: ; BE: // %bb.0: // %Entry ; BE-NEXT: fmov d0, x0 +; BE-NEXT: mov x0, xzr ; BE-NEXT: mov v0.d[1], x1 ; BE-NEXT: rev64 v0.16b, v0.16b ; BE-NEXT: cnt v0.16b, v0.16b ; BE-NEXT: addv b0, v0.16b -; BE-NEXT: rev64 v0.16b, v0.16b -; BE-NEXT: mov x1, v0.d[1] -; BE-NEXT: fmov x0, d0 +; BE-NEXT: fmov x1, d0 ; BE-NEXT: ret ; ; GISEL-LABEL: popcount1x128: