From 7e2d60348850619fb7b0c8a88e92ab103f907d34 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 6 Dec 2024 11:08:21 +0000 Subject: [PATCH 1/2] Handle scalable store size in MemCpyOptimizer The compiler crashes with an ICE when it tries to create a `memset` with scalable size. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +- .../CodeGen/AArch64/memset-scalable-size.ll | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/memset-scalable-size.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0cba5d077da62..fc5f6ff2b7f37 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -800,8 +800,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // in subsequent passes. auto *T = V->getType(); if (T->isAggregateType()) { - uint64_t Size = DL.getTypeStoreSize(T); IRBuilder<> Builder(SI); + Value *Size = + Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T)); auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, SI->getAlign()); M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); diff --git a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll new file mode 100644 index 0000000000000..8ea6330f235a6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s +target triple = "aarch64-unknown-linux" + +define void @f0() { +; CHECK-LABEL: define void @f0() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { , }, align 2 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca { , }, align 2 + store { , } zeroinitializer, ptr %p, align 2 + call void @g(ptr %p) + ret void +} + +define void @f1() { +; CHECK-LABEL: define void @f1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { , , }, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca {, , }, align 16 + store {, , } zeroinitializer, ptr %p, align 16 + call void @g(ptr %p) + ret void +} + +define void @f2() { +; CHECK-LABEL: define void @f2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca { , , }, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @g(ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %p = alloca {, , }, align 16 + store {, , } zeroinitializer, ptr %p, align 16 + call void @g(ptr %p) + ret void +} + +declare void @g(ptr) From eca59d2a9d990d4dafca1dd9714f9bfeb851ea4c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 6 Dec 2024 16:28:38 +0000 Subject: [PATCH 2/2] [fixup] Don't create a call to memset and move test to vscale-memset.ll --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 67 ++++++++++--------- .../CodeGen/AArch64/memset-scalable-size.ll | 56 ---------------- .../Transforms/MemCpyOpt/vscale-memset.ll | 14 +++- 3 files changed, 48 insertions(+), 89 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/memset-scalable-size.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index fc5f6ff2b7f37..bb98b3d1c0725 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -787,44 +787,47 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - auto *V = SI->getOperand(0); - if (Value *ByteVal = isBytewiseValue(V, DL)) { - if (Instruction *I = - tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { - BBI = I->getIterator(); // Don't invalidate iterator. - return true; - } + Value *V = SI->getOperand(0); + Value *ByteVal = isBytewiseValue(V, DL); + if (!ByteVal) + return false; - // If we have an aggregate, we try to promote it to memset regardless - // of opportunity for merging as it can expose optimization opportunities - // in subsequent passes. - auto *T = V->getType(); - if (T->isAggregateType()) { - IRBuilder<> Builder(SI); - Value *Size = - Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T)); - auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, - SI->getAlign()); - M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); + if (Instruction *I = + tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { + BBI = I->getIterator(); // Don't invalidate iterator. + return true; + } + + // If we have an aggregate, we try to promote it to memset regardless + // of opportunity for merging as it can expose optimization opportunities + // in subsequent passes. + auto *T = V->getType(); + if (!T->isAggregateType()) + return false; - LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); + TypeSize Size = DL.getTypeStoreSize(T); + if (Size.isScalable()) + return false; - // The newly inserted memset is immediately overwritten by the original - // store, so we do not need to rename uses. - auto *StoreDef = cast(MSSA->getMemoryAccess(SI)); - auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/false); + IRBuilder<> Builder(SI); + auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, + SI->getAlign()); + M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); - eraseInstruction(SI); - NumMemSetInfer++; + LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); - // Make sure we do not invalidate the iterator. - BBI = M->getIterator(); - return true; - } - } + // The newly inserted memset is immediately overwritten by the original + // store, so we do not need to rename uses. + auto *StoreDef = cast(MSSA->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/false); - return false; + eraseInstruction(SI); + NumMemSetInfer++; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; } bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { diff --git a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll deleted file mode 100644 index 8ea6330f235a6..0000000000000 --- a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll +++ /dev/null @@ -1,56 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s -target triple = "aarch64-unknown-linux" - -define void @f0() { -; CHECK-LABEL: define void @f0() { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[P:%.*]] = alloca { , }, align 2 -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false) -; CHECK-NEXT: call void @g(ptr [[P]]) -; CHECK-NEXT: ret void -; -entry: - %p = alloca { , }, align 2 - store { , } zeroinitializer, ptr %p, align 2 - call void @g(ptr %p) - ret void -} - -define void @f1() { -; CHECK-LABEL: define void @f1() { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[P:%.*]] = alloca { , , }, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) -; CHECK-NEXT: call void @g(ptr [[P]]) -; CHECK-NEXT: ret void -; -entry: - %p = alloca {, , }, align 16 - store {, , } zeroinitializer, ptr %p, align 16 - call void @g(ptr %p) - ret void -} - -define void @f2() { -; CHECK-LABEL: define void @f2() { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[P:%.*]] = alloca { , , }, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false) -; CHECK-NEXT: call void @g(ptr [[P]]) -; CHECK-NEXT: ret void -; -entry: - %p = alloca {, , }, align 16 - store {, , } zeroinitializer, ptr %p, align 16 - call void @g(ptr %p) - ret void -} - -declare void @g(ptr) diff --git a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll index b4ab443fdfb68..45de52065cd5c 100644 --- a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll @@ -8,7 +8,7 @@ define void @foo(ptr %p) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[P]], i64 1 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP1]], align 16 ; CHECK-NEXT: ret void ; @@ -18,6 +18,18 @@ define void @foo(ptr %p) { ret void } +; Test the compiler does not crash on a store of a scalable aggregate type. +define void @test_no_crash_scalable_agg(ptr %p) { +; CHECK-LABEL: @test_no_crash_scalable_agg( +; CHECK-NEXT: entry: +; CHECK-NEXT: store { , } zeroinitializer, ptr [[P:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + store { , } zeroinitializer, ptr %p, align 2 + ret void +} + ; Positive test define void @memset_vscale_index_zero(ptr %p, i8 %z) {