diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ddb8a0ee8179f..523d3aea66225 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6551,17 +6551,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ElementCount NumElts = VT.getVectorElementCount(); - // See if we can fold through bitcasted integer ops. + // See if we can fold through any bitcasted integer ops. if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && - Ops[0].getOpcode() == ISD::BITCAST && - Ops[1].getOpcode() == ISD::BITCAST) { + (Ops[0].getOpcode() == ISD::BITCAST || + Ops[1].getOpcode() == ISD::BITCAST)) { SDValue N1 = peekThroughBitcasts(Ops[0]); SDValue N2 = peekThroughBitcasts(Ops[1]); auto *BV1 = dyn_cast(N1); auto *BV2 = dyn_cast(N2); - EVT BVVT = N1.getValueType(); - if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { + if (BV1 && BV2 && N1.getValueType().isInteger() && + N2.getValueType().isInteger()) { bool IsLE = getDataLayout().isLittleEndian(); unsigned EltBits = VT.getScalarSizeInBits(); SmallVector RawBits1, RawBits2; @@ -6577,15 +6577,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, RawBits.push_back(*Fold); } if (RawBits.size() == NumElts.getFixedValue()) { - // We have constant folded, but we need to cast this again back to - // the original (possibly legalized) type. + // We have constant folded, but we might need to cast this again back + // to the original (possibly legalized) type. + EVT BVVT, BVEltVT; + if (N1.getValueType() == VT) { + BVVT = N1.getValueType(); + BVEltVT = BV1->getOperand(0).getValueType(); + } else { + BVVT = N2.getValueType(); + BVEltVT = BV2->getOperand(0).getValueType(); + } + unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector DstBits; BitVector DstUndefs; BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), DstBits, RawBits, DstUndefs, BitVector(RawBits.size(), false)); - EVT BVEltVT = BV1->getOperand(0).getValueType(); - unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector Ops(DstBits.size(), getUNDEF(BVEltVT)); for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { if (DstUndefs[I]) diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll index 2f27786a0bf93..a0a801d2b6d2c 100644 --- a/llvm/test/CodeGen/ARM/vector-store.ll +++ b/llvm/test/CodeGen/ARM/vector-store.ll @@ -617,17 +617,14 @@ define void @v3i8store(ptr %p) { ; CHECK-LE: @ %bb.0: ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov.i32 d16, #0xff -; CHECK-LE-NEXT: mov r1, sp -; CHECK-LE-NEXT: vmov.i32 d17, #0x0 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vand d16, d17, d16 -; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r1:32] -; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: mov r2, sp +; CHECK-LE-NEXT: str r1, [sp] +; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-LE-NEXT: strb r1, [r0, #2] ; CHECK-LE-NEXT: vmovl.u16 q8, d16 -; CHECK-LE-NEXT: strb r2, [r0, #2] -; CHECK-LE-NEXT: vmov.32 r1, d16[0] -; CHECK-LE-NEXT: strh r1, [r0] +; CHECK-LE-NEXT: vmov.32 r2, d16[0] +; CHECK-LE-NEXT: strh r2, [r0] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/X86/vshift-6.ll b/llvm/test/CodeGen/X86/vshift-6.ll index 21c8e8d3ee5d2..912ff750d9e91 100644 --- a/llvm/test/CodeGen/X86/vshift-6.ll +++ b/llvm/test/CodeGen/X86/vshift-6.ll @@ -32,9 +32,9 @@ define <16 x i8> @do_not_crash(ptr, ptr, ptr, i32, i64, i8) { ; X86-NEXT: movb %al, (%ecx) ; X86-NEXT: movd %eax, %xmm1 ; X86-NEXT: psllq $56, %xmm1 -; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-NEXT: pcmpeqd %xmm3, %xmm3 ; X86-NEXT: psllw $5, %xmm1 +; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-NEXT: pxor %xmm2, %xmm2 ; X86-NEXT: pxor %xmm0, %xmm0 ; X86-NEXT: pcmpgtb %xmm1, %xmm0 @@ -64,9 +64,9 @@ define <16 x i8> @do_not_crash(ptr, ptr, ptr, i32, i64, i8) { ; X64-NEXT: movb %r9b, (%rdi) ; X64-NEXT: movd %r9d, %xmm1 ; X64-NEXT: psllq $56, %xmm1 -; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-NEXT: pcmpeqd %xmm2, %xmm2 ; X64-NEXT: psllw $5, %xmm1 +; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-NEXT: pxor %xmm3, %xmm3 ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtb %xmm1, %xmm0