-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. #83763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-x86 Author: Yeting Kuo (yetingk) ChangesPreviously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like vp intrinsics and stills uses ABI alignment for them when they don't have alignment attriubtes. Full diff: https://github.com/llvm/llvm-project/pull/83763.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc..39628f4fb3689d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4582,6 +4582,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
Alignment = std::nullopt;
+ if (MaybeAlign Align = I.getParamAlign(1))
+ Alignment = Align;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4746,6 +4748,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = std::nullopt;
+ if (MaybeAlign Align = I.getParamAlign(0))
+ Alignment = Align;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index 1851a21c8c0641..b5857b22382da0 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -7,7 +7,7 @@ entry:
ret void
}
-; CHECK-LABEL: bb.0.entry:
+; CHECK-LABEL: name: _Z3fooiPiPs
; CHECK: %1:vr128x = COPY $xmm1
; CHECK-NEXT: %0:vr256x = COPY $ymm0
; CHECK-NEXT: %2:vr128x = VPSLLWZ128ri %1, 15
@@ -16,6 +16,22 @@ entry:
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
; CHECK-NEXT: RET 0
+define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
+entry:
+ %1 = trunc <8 x i32> %gepload to <8 x i16>
+ tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 32 null, <8 x i1> %0)
+ ret void
+}
+
+; CHECK-LABEL: name: _Z3foo2iPiPs
+; CHECK: %1:vr128x = COPY $xmm1
+; CHECK-NEXT: %0:vr256x = COPY $ymm0
+; CHECK-NEXT: %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT: %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT: %4:vr128x = VPMOVDWZ256rr %0
+; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 32)
+; CHECK-NEXT: RET 0
+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
new file mode 100644
index 00000000000000..7f20e03d343cdb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
+
+define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+ %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %0, <8 x i16> %src)
+ ret <8 x i16> %res
+}
+
+; CHECK-LABEL: name: _Z3fooiPiPs
+; CHECK: %1:vr128x = COPY $xmm1
+; CHECK-NEXT: %0:vr128x = COPY $xmm0
+; CHECK-NEXT: %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT: %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT: %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+
+define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+ %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 32 null, <8 x i1> %0, <8 x i16> %src)
+ ret <8 x i16> %res
+}
+
+; CHECK-LABEL: name: _Z3foo2iPiPs
+; CHECK: %1:vr128x = COPY $xmm1
+; CHECK-NEXT: %0:vr128x = COPY $xmm0
+; CHECK-NEXT: %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT: %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT: %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 32)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+
+attributes #0 = { "target-cpu"="icelake-server" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
|
if (MaybeAlign Align = I.getParamAlign(0)) | ||
Alignment = Align; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can just assign Alignment from I.getParamAlign?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should be using I.getParamAlign(0).getValueOr(1)
. The call to DAG.getEVTAlign(VT)
later doesn't make sense for a compressed store or expanding load. The vector type isn't really relevant. Maybe we would use the alignment for the element type, but 1 is conservatively correct.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To clarify the reason I think the vector type isn't relevant, is that we are like not accessing a full vector of memory. If this intrinsic is used in a loop, the pointer for the next iteration will likely have advanced less than a full vector making it not aligned.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had addressed Craig's comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MaybeAlign shouldn't exist. We should just make all of the contexts behave like getValueOr1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had already replaced MaybeAlign by Align in 6322e3d
@@ -0,0 +1,33 @@ | |||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do you need -start-after?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This file is almost copied from originally existed file llvm/test/CodeGen/X86/masked_compressstore_isel.ll I think the reason is to avoid ScalarizeMaskedMemIntrin pass scalarizing the intrinsic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't that not happen if it's legal for the target?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. it doesn't happened for this case. I just guessed the reason about that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use update_mir_test_checks.py. I'll fix the compress store test to use the script and we can rebase this on top of that
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please rebase after bdfebc3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rebased.
@@ -0,0 +1,33 @@ | |||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s | |||
|
|||
define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %0) #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use named values in tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I also update naming in llvm/test/CodeGen/X86/masked_compressstore_isel.ll?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had already updated llvm/test/CodeGen/X86/masked_expandload_isel.ll.
It's probably worth documenting the behavior in LangRef, like we do for other intrinsics that support "align" markings. |
…ndload sections. align attribute is used for masked.compress/expandload in commit llvm#83519, llvm#83763, llvm#83516.
; CHECK-NEXT: %2:vr128x = VPSLLWZ128ri %1, 15 | ||
; CHECK-NEXT: %3:vk16wm = VPMOVW2MZ128rr killed %2 | ||
; CHECK-NEXT: %4:vr128x = VPMOVDWZ256rr %0 | ||
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this have align 16 instead of align 1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh nevermind the check lines are just in a funny spot.
…ssstore/expandload. Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like memory vp intrinsics and stills uses ABI alignment for them when they don't have alignment attriubtes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
what happened to the LangRef change? |
// llvm.masked.store.*(Src0, Ptr, alignment, Mask) | ||
Src0 = I.getArgOperand(0); | ||
Ptr = I.getArgOperand(1); | ||
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue(); | ||
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
were any tests passing 0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The LangRef tells that the alignment value of masked.load/store should be power of 2, so I think we could ignore the case.
Sorry, do you mean why I changed LangRef? I changed it because @efriedma-quic asked for. And I opened another PR for this just I think changing ir document is a little exceed the scope of this patch.
|
Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like vp intrinsics. This does not follow the original code to default use vector type alignment, since it is possible implemented to unaligned vector alignment.