diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b6e018ba0e454..b15f70cbec1cd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -965,6 +965,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 42763aab5bb55..af48a5e216803 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1161,6 +1161,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: + SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); + break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1414,6 +1417,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Extended load during type legalization!"); + SDLoc dl(LD); + EVT VT = LD->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch, + Ptr, LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD); + SDValue ExtractHi = + DAG.getNode(ISD::SRL, dl, IntVT, ALD, + DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl)); + ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 535a87316e162..039edcbf83544 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -374,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec1_ptr: ; CHECK-O3: # %bb.0: @@ -835,6 +903,24 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x half> @atomic_vec4_half(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x half>, ptr %x acquire, align 8 + ret <4 x half> %ret +} + +define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_bfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8 + ret <4 x bfloat> %ret +} + define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec4_float_align: ; CHECK-O3: # %bb.0: