From a5c930485df1bf9150979d8a41c603bcdca44ac0 Mon Sep 17 00:00:00 2001 From: Yeting Kuo Date: Mon, 4 Mar 2024 11:15:21 +0800 Subject: [PATCH 1/2] [DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like memory vp intrinsics and stills uses ABI alignment for them when they don't have alignment attriubtes. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 4 +- .../CodeGen/X86/masked_compressstore_isel.ll | 21 +++++++++- .../CodeGen/X86/masked_expandload_isel.ll | 42 +++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/masked_expandload_isel.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 48476b0ef9705..e0764ef731e8e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4734,7 +4734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = std::nullopt; + Alignment = I.getParamAlign(1).valueOrOne(); }; Value *PtrOperand, *MaskOperand, *Src0Operand; @@ -4898,7 +4898,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = std::nullopt; + Alignment = I.getParamAlign(0).valueOrOne(); Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll index 0587434bff255..2a557ac9b97b3 100644 --- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll +++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll @@ -11,7 +11,7 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 { ; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15 ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]] ; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]] - ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16) + ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1) ; CHECK-NEXT: RET 0 entry: %1 = trunc <8 x i32> %gepload to <8 x i16> @@ -19,6 +19,25 @@ entry: ret void } + +define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 { + ; CHECK-LABEL: name: _Z3foo2iPiPs + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $ymm0, $xmm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0 + ; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15 + ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]] + ; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]] + ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16) + ; CHECK-NEXT: RET 0 +entry: + %1 = trunc <8 x i32> %gepload to <8 x i16> + tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0) + ret void +} + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write) declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1 diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll new file mode 100644 index 0000000000000..b364625a1e6f1 --- /dev/null +++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s + +define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 { + ; CHECK-LABEL: name: _Z3fooiPiPs + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $xmm0, $xmm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0 + ; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15 + ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]] + ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1) + ; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]] + ; CHECK-NEXT: RET 0, $xmm0 +entry: + %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src) + ret <8 x i16> %res +} + +define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 { + ; CHECK-LABEL: name: _Z3foo2iPiPs + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $xmm0, $xmm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0 + ; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15 + ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]] + ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16) + ; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]] + ; CHECK-NEXT: RET 0, $xmm0 +entry: + %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src) + ret <8 x i16> %res +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) + +attributes #0 = { "target-cpu"="icelake-server" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } From 6322e3d0486c805d1e1c90381e7d8018a021bbef Mon Sep 17 00:00:00 2001 From: Yeting Kuo Date: Tue, 5 Mar 2024 11:07:28 +0800 Subject: [PATCH 2/2] Use Align instead of MaybeAlign. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e0764ef731e8e..d6ab0cf6df876 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4721,15 +4721,15 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDLoc sdl = getCurSDLoc(); auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); - Alignment = cast(I.getArgOperand(2))->getMaybeAlignValue(); + Alignment = cast(I.getArgOperand(2))->getAlignValue(); Mask = I.getArgOperand(3); }; auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); @@ -4738,7 +4738,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, }; Value *PtrOperand, *MaskOperand, *Src0Operand; - MaybeAlign Alignment; + Align Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4750,12 +4750,10 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4887,15 +4885,15 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = cast(I.getArgOperand(1))->getMaybeAlignValue(); + Alignment = cast(I.getArgOperand(1))->getAlignValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); Alignment = I.getParamAlign(0).valueOrOne(); @@ -4904,7 +4902,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { }; Value *PtrOperand, *MaskOperand, *Src0Operand; - MaybeAlign Alignment; + Align Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4916,9 +4914,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(I); @@ -4930,7 +4925,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,