From bd1d6b15c5a201feaa5a6452c5ffd86999adc665 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Sat, 5 Apr 2025 03:45:41 +0200 Subject: [PATCH] [X86][SSE] Don't emit SSE2 load instructions in SSE1-only mode --- .../Target/X86/X86FixupVectorConstants.cpp | 11 ++++++---- llvm/test/CodeGen/X86/pr134607.ll | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr134607.ll diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp index 40024baf93fdb..2c870d1171658 100644 --- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp +++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp @@ -333,6 +333,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, MachineInstr &MI) { unsigned Opc = MI.getOpcode(); MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool(); + bool HasSSE2 = ST->hasSSE2(); bool HasSSE41 = ST->hasSSE41(); bool HasAVX2 = ST->hasAVX2(); bool HasDQI = ST->hasDQI(); @@ -394,11 +395,13 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, case X86::MOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPDrm: - case X86::MOVUPSrm: + case X86::MOVUPSrm: { // TODO: SSE3 MOVDDUP Handling - return FixupConstant({{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst}, - {X86::MOVSDrm, 1, 64, rebuildZeroUpperCst}}, - 128, 1); + FixupEntry Fixups[] = { + {X86::MOVSSrm, 1, 32, rebuildZeroUpperCst}, + {HasSSE2 ? X86::MOVSDrm : 0, 1, 64, rebuildZeroUpperCst}}; + return FixupConstant(Fixups, 128, 1); + } case X86::VMOVAPDrm: case X86::VMOVAPSrm: case X86::VMOVUPDrm: diff --git a/llvm/test/CodeGen/X86/pr134607.ll b/llvm/test/CodeGen/X86/pr134607.ll new file mode 100644 index 0000000000000..5e824c22e5a22 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr134607.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse -O3 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,+sse -O3 | FileCheck %s --check-prefixes=X64-SSE1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,+sse -O3 | FileCheck %s --check-prefixes=X64-SSE2 + +define void @store_v2f32_constant(ptr %v) { +; X86-LABEL: store_v2f32_constant: +; X86: # %bb.0: +; X86-NEXT: movl 4(%esp), %eax +; X86-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 + +; X64-SSE1-LABEL: store_v2f32_constant: +; X64-SSE1: # %bb.0: +; X64-SSE1-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 + +; X64-SSE2-LABEL: store_v2f32_constant: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 + store <2 x float> , ptr %v, align 4 + ret void +}