Skip to content

Commit 7e2d603

Browse files
Handle scalable store size in MemCpyOptimizer
The compiler crashes with an ICE when it tries to create a `memset` with scalable size.
1 parent 5cbc421 commit 7e2d603

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,8 +800,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
800800
// in subsequent passes.
801801
auto *T = V->getType();
802802
if (T->isAggregateType()) {
803-
uint64_t Size = DL.getTypeStoreSize(T);
804803
IRBuilder<> Builder(SI);
804+
Value *Size =
805+
Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
805806
auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
806807
SI->getAlign());
807808
M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s
3+
target triple = "aarch64-unknown-linux"
4+
5+
define void @f0() {
6+
; CHECK-LABEL: define void @f0() {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
9+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false)
12+
; CHECK-NEXT: call void @g(ptr [[P]])
13+
; CHECK-NEXT: ret void
14+
;
15+
entry:
16+
%p = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>}, align 2
17+
store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr %p, align 2
18+
call void @g(ptr %p)
19+
ret void
20+
}
21+
22+
define void @f1() {
23+
; CHECK-LABEL: define void @f1() {
24+
; CHECK-NEXT: [[ENTRY:.*:]]
25+
; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
26+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
27+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48
28+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
29+
; CHECK-NEXT: call void @g(ptr [[P]])
30+
; CHECK-NEXT: ret void
31+
;
32+
entry:
33+
%p = alloca {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
34+
store {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr %p, align 16
35+
call void @g(ptr %p)
36+
ret void
37+
}
38+
39+
define void @f2() {
40+
; CHECK-LABEL: define void @f2() {
41+
; CHECK-NEXT: [[ENTRY:.*:]]
42+
; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
43+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
44+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192
45+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
46+
; CHECK-NEXT: call void @g(ptr [[P]])
47+
; CHECK-NEXT: ret void
48+
;
49+
entry:
50+
%p = alloca {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
51+
store {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> } zeroinitializer, ptr %p, align 16
52+
call void @g(ptr %p)
53+
ret void
54+
}
55+
56+
declare void @g(ptr)

0 commit comments

Comments
 (0)