Skip to content

Commit a5c9304

Browse files
author
Yeting Kuo
committed
[DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload.
Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload. This patch allows SelectionDAGBuilder to use parameter alignment like memory vp intrinsics and stills uses ABI alignment for them when they don't have alignment attriubtes.
1 parent 1a67dee commit a5c9304

File tree

3 files changed

+64
-3
lines changed

3 files changed

+64
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4734,7 +4734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
47344734
Src0 = I.getArgOperand(0);
47354735
Ptr = I.getArgOperand(1);
47364736
Mask = I.getArgOperand(2);
4737-
Alignment = std::nullopt;
4737+
Alignment = I.getParamAlign(1).valueOrOne();
47384738
};
47394739

47404740
Value *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4898,7 +4898,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
48984898
MaybeAlign &Alignment) {
48994899
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
49004900
Ptr = I.getArgOperand(0);
4901-
Alignment = std::nullopt;
4901+
Alignment = I.getParamAlign(0).valueOrOne();
49024902
Mask = I.getArgOperand(1);
49034903
Src0 = I.getArgOperand(2);
49044904
};

llvm/test/CodeGen/X86/masked_compressstore_isel.ll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,33 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
1111
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
1212
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
1313
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
14-
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
14+
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
1515
; CHECK-NEXT: RET 0
1616
entry:
1717
%1 = trunc <8 x i32> %gepload to <8 x i16>
1818
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr null, <8 x i1> %0)
1919
ret void
2020
}
2121

22+
23+
define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
24+
; CHECK-LABEL: name: _Z3foo2iPiPs
25+
; CHECK: bb.0.entry:
26+
; CHECK-NEXT: liveins: $ymm0, $xmm1
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
29+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
30+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
31+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
32+
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
33+
; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
34+
; CHECK-NEXT: RET 0
35+
entry:
36+
%1 = trunc <8 x i32> %gepload to <8 x i16>
37+
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
38+
ret void
39+
}
40+
2241
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
2342
declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
2443

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
3+
4+
define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
5+
; CHECK-LABEL: name: _Z3fooiPiPs
6+
; CHECK: bb.0.entry:
7+
; CHECK-NEXT: liveins: $xmm0, $xmm1
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
11+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
12+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
13+
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
14+
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
15+
; CHECK-NEXT: RET 0, $xmm0
16+
entry:
17+
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src)
18+
ret <8 x i16> %res
19+
}
20+
21+
define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
22+
; CHECK-LABEL: name: _Z3foo2iPiPs
23+
; CHECK: bb.0.entry:
24+
; CHECK-NEXT: liveins: $xmm0, $xmm1
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
27+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
28+
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
29+
; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
30+
; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
31+
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
32+
; CHECK-NEXT: RET 0, $xmm0
33+
entry:
34+
%res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src)
35+
ret <8 x i16> %res
36+
}
37+
38+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
39+
declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
40+
41+
attributes #0 = { "target-cpu"="icelake-server" }
42+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

0 commit comments

Comments
 (0)