Skip to content

Commit d95a0d7

Browse files
authored
[DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. (#83763)
Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like vp intrinsics. This does not follow the original code to default use vector type alignment, since it is possible implemented to unaligned vector alignment.
1 parent c00c901 commit d95a0d7

File tree

3 files changed

+74
-18
lines changed

3 files changed

+74
-18
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4721,24 +4721,24 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
47214721
SDLoc sdl = getCurSDLoc();
47224722

47234723
auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4724-
MaybeAlign &Alignment) {
4724+
Align &Alignment) {
47254725
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
47264726
Src0 = I.getArgOperand(0);
47274727
Ptr = I.getArgOperand(1);
4728-
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
4728+
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue();
47294729
Mask = I.getArgOperand(3);
47304730
};
47314731
auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4732-
MaybeAlign &Alignment) {
4732+
Align &Alignment) {
47334733
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
47344734
Src0 = I.getArgOperand(0);
47354735
Ptr = I.getArgOperand(1);
47364736
Mask = I.getArgOperand(2);
4737-
Alignment = std::nullopt;
4737+
Alignment = I.getParamAlign(1).valueOrOne();
47384738
};
47394739

47404740
Value *PtrOperand, *MaskOperand, *Src0Operand;
4741-
MaybeAlign Alignment;
4741+
Align Alignment;
47424742
if (IsCompressing)
47434743
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
47444744
else
@@ -4750,12 +4750,10 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
47504750
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
47514751

47524752
EVT VT = Src0.getValueType();
4753-
if (!Alignment)
4754-
Alignment = DAG.getEVTAlign(VT);
47554753

47564754
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
47574755
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
4758-
MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
4756+
MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
47594757
SDValue StoreNode =
47604758
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
47614759
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4887,24 +4885,24 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
48874885
SDLoc sdl = getCurSDLoc();
48884886

48894887
auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4890-
MaybeAlign &Alignment) {
4888+
Align &Alignment) {
48914889
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
48924890
Ptr = I.getArgOperand(0);
4893-
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
4891+
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue();
48944892
Mask = I.getArgOperand(2);
48954893
Src0 = I.getArgOperand(3);
48964894
};
48974895
auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4898-
MaybeAlign &Alignment) {
4896+
Align &Alignment) {
48994897
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
49004898
Ptr = I.getArgOperand(0);
4901-
Alignment = std::nullopt;
4899+
Alignment = I.getParamAlign(0).valueOrOne();
49024900
Mask = I.getArgOperand(1);
49034901
Src0 = I.getArgOperand(2);
49044902
};
49054903

49064904
Value *PtrOperand, *MaskOperand, *Src0Operand;
4907-
MaybeAlign Alignment;
4905+
Align Alignment;
49084906
if (IsExpanding)
49094907
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
49104908
else
@@ -4916,9 +4914,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
49164914
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
49174915

49184916
EVT VT = Src0.getValueType();
4919-
if (!Alignment)
4920-
Alignment = DAG.getEVTAlign(VT);
4921-
49224917
AAMDNodes AAInfo = I.getAAMetadata();
49234918
const MDNode *Ranges = getRangeMetadata(I);
49244919

@@ -4930,7 +4925,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
49304925

49314926
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
49324927
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
4933-
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
4928+
MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
49344929

49354930
SDValue Load =
49364931
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,

llvm/test/CodeGen/X86/masked_compressstore_isel.ll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,33 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
1111
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
1212
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
1313
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
14-
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
14+
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
1515
; CHECK-NEXT: RET 0
1616
entry:
1717
%1 = trunc <8 x i32> %gepload to <8 x i16>
1818
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr null, <8 x i1> %0)
1919
ret void
2020
}
2121

22+
23+
define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
24+
; CHECK-LABEL: name: _Z3foo2iPiPs
25+
; CHECK: bb.0.entry:
26+
; CHECK-NEXT: liveins: $ymm0, $xmm1
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
29+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
30+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
31+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
32+
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
33+
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
34+
; CHECK-NEXT: RET 0
35+
entry:
36+
%1 = trunc <8 x i32> %gepload to <8 x i16>
37+
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
38+
ret void
39+
}
40+
2241
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
2342
declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
2443

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
3+
4+
define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
5+
; CHECK-LABEL: name: _Z3fooiPiPs
6+
; CHECK: bb.0.entry:
7+
; CHECK-NEXT: liveins: $xmm0, $xmm1
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
11+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
12+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
13+
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
14+
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
15+
; CHECK-NEXT: RET 0, $xmm0
16+
entry:
17+
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src)
18+
ret <8 x i16> %res
19+
}
20+
21+
define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
22+
; CHECK-LABEL: name: _Z3foo2iPiPs
23+
; CHECK: bb.0.entry:
24+
; CHECK-NEXT: liveins: $xmm0, $xmm1
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
27+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
28+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
29+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
30+
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
31+
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
32+
; CHECK-NEXT: RET 0, $xmm0
33+
entry:
34+
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src)
35+
ret <8 x i16> %res
36+
}
37+
38+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
39+
declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
40+
41+
attributes #0 = { "target-cpu"="icelake-server" }
42+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

0 commit comments

Comments
 (0)