Skip to content

[DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. #83763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 12 additions & 17 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4721,24 +4721,24 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDLoc sdl = getCurSDLoc();

auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
Align &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

were any tests passing 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The LangRef tells that the alignment value of masked.load/store should be power of 2, so I think we could ignore the case.

Mask = I.getArgOperand(3);
};
auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
Align &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
Alignment = std::nullopt;
Alignment = I.getParamAlign(1).valueOrOne();
};

Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
Align Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
Expand All @@ -4750,12 +4750,10 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);

MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
Expand Down Expand Up @@ -4887,24 +4885,24 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();

auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
Align &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
Align &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = std::nullopt;
Alignment = I.getParamAlign(0).valueOrOne();
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};

Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
Align Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
Expand All @@ -4916,9 +4914,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);

AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = getRangeMetadata(I);

Expand All @@ -4930,7 +4925,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {

MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);

SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
Expand Down
21 changes: 20 additions & 1 deletion llvm/test/CodeGen/X86/masked_compressstore_isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,33 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
; CHECK-NEXT: RET 0
entry:
%1 = trunc <8 x i32> %gepload to <8 x i16>
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr null, <8 x i1> %0)
ret void
}


define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
; CHECK-LABEL: name: _Z3foo2iPiPs
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $ymm0, $xmm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
; CHECK-NEXT: RET 0
entry:
%1 = trunc <8 x i32> %gepload to <8 x i16>
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1

Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/X86/masked_expandload_isel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you need -start-after?

Copy link
Contributor Author

@yetingk yetingk Mar 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is almost copied from originally existed file llvm/test/CodeGen/X86/masked_compressstore_isel.ll I think the reason is to avoid ScalarizeMaskedMemIntrin pass scalarizing the intrinsic.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't that not happen if it's legal for the target?

Copy link
Contributor Author

@yetingk yetingk Mar 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. it doesn't happened for this case. I just guessed the reason about that.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use update_mir_test_checks.py. I'll fix the compress store test to use the script and we can rebase this on top of that

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rebase after bdfebc3

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rebased.


define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
; CHECK-LABEL: name: _Z3fooiPiPs
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $xmm0, $xmm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
; CHECK-NEXT: RET 0, $xmm0
entry:
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src)
ret <8 x i16> %res
}

define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
; CHECK-LABEL: name: _Z3foo2iPiPs
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $xmm0, $xmm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
; CHECK-NEXT: RET 0, $xmm0
entry:
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src)
ret <8 x i16> %res
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)

attributes #0 = { "target-cpu"="icelake-server" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }