From 56345c1cee4375eb5c28b8e7abf4803d20216b3b Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Mon, 1 Jul 2024 23:58:28 +0000 Subject: [PATCH 01/11] Add the initializes attribute inference --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 334 ++++++++++++- .../TypeBasedAliasAnalysis/functionattrs.ll | 2 +- .../amdgpu-libcall-sincos-pass-ordering.ll | 2 +- .../AMDGPU/amdgpu-simplify-libcall-sincos.ll | 106 ++-- .../BPF/preserve-static-offset/store-zero.ll | 2 +- llvm/test/Other/optimize-inrange-gep.ll | 2 +- llvm/test/Transforms/Coroutines/coro-async.ll | 6 +- .../Transforms/FunctionAttrs/argmemonly.ll | 10 +- .../Transforms/FunctionAttrs/initializes.ll | 472 ++++++++++++++++++ .../Transforms/FunctionAttrs/nocapture.ll | 2 +- .../Transforms/FunctionAttrs/readattrs.ll | 4 +- .../Transforms/FunctionAttrs/writeonly.ll | 6 +- .../PGOProfile/memprof_internal_linkage.ll | 5 +- .../PhaseOrdering/X86/unroll-vectorizer.ll | 2 +- llvm/test/Transforms/PhaseOrdering/pr95152.ll | 6 +- 15 files changed, 881 insertions(+), 80 deletions(-) create mode 100644 llvm/test/Transforms/FunctionAttrs/initializes.ll diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 7b419d0f098b5..507dbf4ef26f0 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -36,6 +37,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRangeList.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" @@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker { const SCCNodeSet &SCCNodes; }; +struct ArgumentUse { + Use *U; + std::optional Offset; +}; + +// A struct of argument access info. "Unknown" accesses are the cases like +// unrecognized instructions, instructions that have more than one use of +// the argument, or volatile memory accesses. "Unknown" implies "IsClobber" +// and an empty access range. +// Write or Read accesses can be clobbers as well for example, a Load with +// scalable type. +struct ArgumentAccessInfo { + enum AccessType { Write, Read, Unknown }; + AccessType ArgAccessType; + ConstantRangeList AccessRanges; + bool IsClobber = false; +}; + +struct UsesPerBlockInfo { + DenseMap Insts; + bool HasWrites; + bool HasClobber; +}; + +ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, + const ArgumentUse &IU, + const DataLayout &DL) { + auto GetTypeAccessRange = + [&DL](Type *Ty, + std::optional Offset) -> std::optional { + auto TypeSize = DL.getTypeStoreSize(Ty); + if (!TypeSize.isScalable() && Offset.has_value()) { + int64_t Size = TypeSize.getFixedValue(); + return ConstantRange(APInt(64, Offset.value(), true), + APInt(64, Offset.value() + Size, true)); + } + return std::nullopt; + }; + auto GetConstantIntRange = + [](Value *Length, + std::optional Offset) -> std::optional { + auto *ConstantLength = dyn_cast(Length); + if (ConstantLength && Offset.has_value()) { + return ConstantRange( + APInt(64, Offset.value(), true), + APInt(64, Offset.value() + ConstantLength->getSExtValue(), true)); + } + return std::nullopt; + }; + if (auto *SI = dyn_cast(I)) { + if (&SI->getOperandUse(1) == IU.U) { + // Get the fixed type size of "SI". Since the access range of a write + // will be unioned, if "SI" doesn't have a fixed type size, we just set + // the access range to empty. + ConstantRangeList AccessRanges; + auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), IU.Offset); + if (TypeAccessRange.has_value()) + AccessRanges.insert(TypeAccessRange.value()); + return {ArgumentAccessInfo::AccessType::Write, AccessRanges, + /*IsClobber=*/false}; + } + } else if (auto *LI = dyn_cast(I)) { + if (&LI->getOperandUse(0) == IU.U) { + // Get the fixed type size of "LI". Different from Write, if "LI" + // doesn't have a fixed type size, we conservatively set as a clobber + // with an empty access range. + auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), IU.Offset); + if (TypeAccessRange.has_value()) + return {ArgumentAccessInfo::AccessType::Read, + {TypeAccessRange.value()}, + /*IsClobber=*/false}; + else + return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true}; + } + } else if (auto *MemSet = dyn_cast(I)) { + if (!MemSet->isVolatile()) { + ConstantRangeList AccessRanges; + auto AccessRange = GetConstantIntRange(MemSet->getLength(), IU.Offset); + if (AccessRange.has_value()) + AccessRanges.insert(AccessRange.value()); + return {ArgumentAccessInfo::AccessType::Write, AccessRanges, + /*IsClobber=*/false}; + } + } else if (auto *MemCpy = dyn_cast(I)) { + if (!MemCpy->isVolatile()) { + if (&MemCpy->getOperandUse(0) == IU.U) { + ConstantRangeList AccessRanges; + auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset); + if (AccessRange.has_value()) + AccessRanges.insert(AccessRange.value()); + return {ArgumentAccessInfo::AccessType::Write, AccessRanges, + /*IsClobber=*/false}; + } else if (&MemCpy->getOperandUse(1) == IU.U) { + auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset); + if (AccessRange.has_value()) + return {ArgumentAccessInfo::AccessType::Read, + {AccessRange.value()}, + /*IsClobber=*/false}; + else + return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true}; + } + } + } else if (auto *CB = dyn_cast(I)) { + if (CB->isArgOperand(IU.U)) { + unsigned ArgNo = CB->getArgOperandNo(IU.U); + bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); + // Argument is only not clobbered when parameter is writeonly/readnone + // and nocapture. + bool IsClobber = !(CB->onlyWritesMemory(ArgNo) && + CB->paramHasAttr(ArgNo, Attribute::NoCapture)); + ConstantRangeList AccessRanges; + if (IsInitialize && IU.Offset.has_value()) { + Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); + if (!Attr.isValid()) { + Attr = CB->getCalledFunction()->getParamAttribute( + ArgNo, Attribute::Initializes); + } + ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); + for (ConstantRange &CR : CBCRL) { + AccessRanges.insert(ConstantRange(CR.getLower() + IU.Offset.value(), + CR.getUpper() + IU.Offset.value())); + } + return {ArgumentAccessInfo::AccessType::Write, AccessRanges, IsClobber}; + } + } + } + // Unrecognized instructions are considered clobbers. + return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true}; +} + +std::pair CollectArgumentUsesPerBlock( + Argument &A, Function &F, + DenseMap &UsesPerBlock) { + auto &DL = F.getParent()->getDataLayout(); + auto PointerSize = + DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); + + bool HasAnyWrite = false; + bool HasWriteOutsideEntryBB = false; + + BasicBlock &EntryBB = F.getEntryBlock(); + SmallVector Worklist; + for (Use &U : A.uses()) + Worklist.push_back({&U, 0}); + + auto UpdateUseInfo = [&UsesPerBlock](Instruction *I, + ArgumentAccessInfo Info) { + auto *BB = I->getParent(); + auto &BBInfo = UsesPerBlock.getOrInsertDefault(BB); + bool AlreadyVisitedInst = BBInfo.Insts.contains(I); + auto &IInfo = BBInfo.Insts[I]; + + // Instructions that have more than one use of the argument are considered + // as clobbers. + if (AlreadyVisitedInst) { + IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}, true}; + BBInfo.HasClobber = true; + return false; + } + + IInfo = Info; + BBInfo.HasClobber |= IInfo.IsClobber; + BBInfo.HasWrites |= + (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && + !IInfo.AccessRanges.empty()); + return !IInfo.AccessRanges.empty(); + }; + + // No need for a visited set because we don't look through phis, so there are + // no cycles. + while (!Worklist.empty()) { + ArgumentUse IU = Worklist.pop_back_val(); + User *U = IU.U->getUser(); + // Add GEP uses to worklist. + // If the GEP is not a constant GEP, set IsInitialize to false. + if (auto *GEP = dyn_cast(U)) { + APInt Offset(PointerSize, 0, /*isSigned=*/true); + bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset); + std::optional NewOffset = std::nullopt; + if (IsConstGEP && IU.Offset.has_value()) { + NewOffset = *IU.Offset + Offset.getSExtValue(); + } + for (Use &U : GEP->uses()) + Worklist.push_back({&U, NewOffset}); + continue; + } + + auto *I = cast(U); + bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, IU, DL)); + + HasAnyWrite |= HasWrite; + + if (HasWrite && I->getParent() != &EntryBB) { + HasWriteOutsideEntryBB = true; + } + } + return {HasAnyWrite, HasWriteOutsideEntryBB}; +} + } // end anonymous namespace namespace llvm { @@ -866,9 +1067,132 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { return true; } +static bool inferInitializes(Argument &A, Function &F) { + DenseMap UsesPerBlock; + auto [HasAnyWrite, HasWriteOutsideEntryBB] = + CollectArgumentUsesPerBlock(A, F, UsesPerBlock); + // No write anywhere in the function, bail. + if (!HasAnyWrite) + return false; + + BasicBlock &EntryBB = F.getEntryBlock(); + DenseMap Initialized; + auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { + auto UPB = UsesPerBlock.find(BB); + + // If this block has uses and none are writes, the argument is not + // initialized in this block. + if (UPB != UsesPerBlock.end() && !UPB->second.HasWrites) + return ConstantRangeList(); + + ConstantRangeList CRL; + + // Start with intersection of successors. + // If this block has any clobbering use, we're going to clear out the + // ranges at some point in this block anyway, so don't bother looking at + // successors. + if (UPB == UsesPerBlock.end() || !UPB->second.HasClobber) { + bool HasAddedSuccessor = false; + for (auto *Succ : successors(BB)) { + if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { + if (HasAddedSuccessor) { + CRL = CRL.intersectWith(SuccI->second); + } else { + CRL = SuccI->second; + HasAddedSuccessor = true; + } + } else { + CRL = ConstantRangeList(); + break; + } + } + } + + if (UPB != UsesPerBlock.end()) { + // Sort uses in this block by instruction order. + SmallVector, 2> Insts; + append_range(Insts, UPB->second.Insts); + sort(Insts, [](std::pair &LHS, + std::pair &RHS) { + return LHS.first->comesBefore(RHS.first); + }); + + // From the end of the block to the beginning of the block, set + // initializes ranges. + for (auto [_, Info] : reverse(Insts)) { + if (Info.IsClobber) { + CRL = ConstantRangeList(); + } + if (!Info.AccessRanges.empty()) { + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) { + CRL = CRL.unionWith(Info.AccessRanges); + } else { + assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); + for (const auto &ReadRange : Info.AccessRanges) { + CRL.subtract(ReadRange); + } + } + } + } + } + return CRL; + }; + + ConstantRangeList EntryCRL; + // If all write instructions are in the EntryBB, or if the EntryBB has + // a clobbering use, we only need to look at EntryBB. + bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB; + if (!OnlyScanEntryBlock) { + if (auto EntryUPB = UsesPerBlock.find(&EntryBB); + EntryUPB != UsesPerBlock.end()) { + OnlyScanEntryBlock = EntryUPB->second.HasClobber; + } + } + if (OnlyScanEntryBlock) { + EntryCRL = VisitBlock(&EntryBB); + if (EntryCRL.empty()) { + return false; + } + } else { + // Visit successors before predecessors with a post-order walk of the + // blocks. + for (const BasicBlock *BB : post_order(&F)) { + ConstantRangeList CRL = VisitBlock(BB); + if (!CRL.empty()) { + Initialized[BB] = CRL; + } + } + + auto EntryCRLI = Initialized.find(&EntryBB); + if (EntryCRLI == Initialized.end()) { + return false; + } + + EntryCRL = EntryCRLI->second; + } + + assert(!EntryCRL.empty() && + "should have bailed already if EntryCRL is empty"); + + if (A.hasAttribute(Attribute::Initializes)) { + ConstantRangeList PreviousCRL = + A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); + if (PreviousCRL == EntryCRL) { + return false; + } + EntryCRL = EntryCRL.unionWith(PreviousCRL); + } + + A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, + EntryCRL.rangesRef())); + + return true; +} + /// Deduce nocapture attributes for the SCC. static void addArgumentAttrs(const SCCNodeSet &SCCNodes, - SmallSet &Changed) { + SmallSet &Changed, + bool SkipInitializes) { ArgumentGraph AG; // Check each function in turn, determining which pointer arguments are not @@ -936,6 +1260,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes, if (addAccessAttr(&A, R)) Changed.insert(F); } + if (!SkipInitializes && !A.onlyReadsMemory()) { + if (inferInitializes(A, *F)) + Changed.insert(F); + } } } @@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter, SmallSet Changed; if (ArgAttrsOnly) { - addArgumentAttrs(Nodes.SCCNodes, Changed); + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); return Changed; } addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); - addArgumentAttrs(Nodes.SCCNodes, Changed); + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); addWillReturn(Nodes.SCCNodes, Changed); diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index bea56a72bdeae..8615363a985d1 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind { ret void } -; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 { +; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 { define void @test0_no(ptr %p) nounwind { store i32 0, ptr %p, !tbaa !2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll index 6b835bb4eef66..317a069eed26e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll @@ -10,7 +10,7 @@ ; Should have call to sincos declarations, not calls to the asm pseudo-libcalls define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 { ; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865( -; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index a35fbaadddf9e..619124affff81 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -49,7 +49,7 @@ declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1 define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16_nocontract -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]]) ; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2 @@ -68,7 +68,7 @@ entry: define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f16_nocontract -; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]]) ; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -87,7 +87,7 @@ entry: define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16 -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]]) ; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2 @@ -105,7 +105,7 @@ entry: define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16_order1 -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]]) ; CHECK-NEXT: store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2 @@ -123,7 +123,7 @@ entry: define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f16 -; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]]) ; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -141,7 +141,7 @@ entry: define void @sincos_v3f16(<3 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f16 -; CHECK-SAME: (<3 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<3 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <3 x half> @_Z3sinDv3_Dh(<3 x half> [[X]]) ; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x half> [[CALL]], <3 x half> poison, <4 x i32> @@ -164,7 +164,7 @@ entry: define void @sincos_v4f16(<4 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f16 -; CHECK-SAME: (<4 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<4 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <4 x half> @_Z3sinDv4_Dh(<4 x half> [[X]]) ; CHECK-NEXT: store <4 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 @@ -182,7 +182,7 @@ entry: define void @sincos_v8f16(<8 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f16 -; CHECK-SAME: (<8 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<8 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <8 x half> @_Z3sinDv8_Dh(<8 x half> [[X]]) ; CHECK-NEXT: store <8 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16 @@ -201,7 +201,7 @@ entry: define void @sincos_v16f16(<16 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f16 -; CHECK-SAME: (<16 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<16 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <16 x half> @_Z3sinDv16_Dh(<16 x half> [[X]]) ; CHECK-NEXT: store <16 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32 @@ -220,7 +220,7 @@ entry: define void @sincos_f32_nocontract(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_nocontract -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -240,7 +240,7 @@ entry: define void @sincos_v2f32_nocontract(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_nocontract -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -260,7 +260,7 @@ entry: define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -279,7 +279,7 @@ entry: define void @sincos_f32_order1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_order1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -298,7 +298,7 @@ entry: define void @sincos_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -317,7 +317,7 @@ entry: define void @sincos_v3f32(<3 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f32 -; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x float>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x float> @_Z6sincosDv3_fPU3AS5S_(<3 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -340,7 +340,7 @@ entry: define void @sincos_v4f32(<4 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f32 -; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x float>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x float> @_Z6sincosDv4_fPU3AS5S_(<4 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -359,7 +359,7 @@ entry: define void @sincos_v8f32(<8 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f32 -; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x float>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x float> @_Z6sincosDv8_fPU3AS5S_(<8 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -378,7 +378,7 @@ entry: define void @sincos_v16f32(<16 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f32 -; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x float>, align 64, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x float> @_Z6sincosDv16_fPU3AS5S_(<16 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -397,7 +397,7 @@ entry: define void @sincos_f64_nocontract(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_nocontract -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -417,7 +417,7 @@ entry: define void @sincos_v2f64_nocontract(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64_nocontract -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -436,7 +436,7 @@ entry: define void @sincos_f64(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -455,7 +455,7 @@ entry: define void @sincos_f64_order1(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_order1 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -474,7 +474,7 @@ entry: define void @sincos_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64 -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -493,7 +493,7 @@ entry: define void @sincos_v3f64(<3 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f64 -; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x double>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x double> @_Z6sincosDv3_dPU3AS5S_(<3 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -516,7 +516,7 @@ entry: define void @sincos_v4f64(<4 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f64 -; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x double>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x double> @_Z6sincosDv4_dPU3AS5S_(<4 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -535,7 +535,7 @@ entry: define void @sincos_v8f64(<8 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f64 -; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x double>, align 64, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x double> @_Z6sincosDv8_dPU3AS5S_(<8 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -554,7 +554,7 @@ entry: define void @sincos_v16f64(<16 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f64 -; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 128)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 128)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x double>, align 128, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x double> @_Z6sincosDv16_dPU3AS5S_(<16 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -607,7 +607,7 @@ bb1: ; The sin and cos are in different blocks but always execute define void @sincos_f32_different_blocks_dominating_always_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_always_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -639,7 +639,7 @@ bb1: ; sin dominates cos but cos doesn't always execute. define void @sincos_f32_different_blocks_dominating_conditional_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_conditional_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -685,7 +685,7 @@ declare void @func(ptr addrspace(1)) define void @sincos_f32_value_is_instr(ptr addrspace(1) %value.ptr, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_instr -; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: tail call void @func(ptr addrspace(1) [[VALUE_PTR]]) @@ -708,7 +708,7 @@ entry: define void @sincos_f32_value_is_same_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_same_constexpr -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float bitcast (i32 ptrtoint (ptr @func to i32) to float), ptr addrspace(5) [[__SINCOS_]]) @@ -727,7 +727,7 @@ entry: define void @sincos_f32_value_is_different_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_different_constexpr -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) #[[ATTR2]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float)) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -745,7 +745,7 @@ entry: define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) @@ -764,7 +764,7 @@ entry: define void @sincos_f32_value_is_different_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_different_constantfp -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -782,7 +782,7 @@ entry: define void @sincos_f32_different_args(float %x, float %y, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_different_args -; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -800,7 +800,7 @@ entry: define void @sincos_f32_flag_intersect0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -819,7 +819,7 @@ entry: define void @sincos_f32_flag_intersect1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -838,7 +838,7 @@ entry: define void @sincos_v2f32_flag_intersect1(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_flag_intersect1 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -914,7 +914,7 @@ entry: define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define void @sincos_f32_repeated_uses -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -947,7 +947,7 @@ entry: define void @sin_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) { ; CHECK-LABEL: define void @sin_f32_indirect_call_user -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -965,7 +965,7 @@ entry: define void @cos_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) { ; CHECK-LABEL: define void @cos_f32_indirect_call_user -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3cosf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[COS_OUT]], align 4 @@ -983,7 +983,7 @@ entry: define void @sincos_f32_preserve_fpmath_0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath [[META5:![0-9]+]] @@ -1002,7 +1002,7 @@ entry: define void @sincos_f32_preserve_fpmath_1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath [[META6:![0-9]+]] @@ -1022,7 +1022,7 @@ entry: ; Should drop the metadata define void @sincos_f32_drop_fpmath(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_drop_fpmath -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -1041,7 +1041,7 @@ entry: define void @sincos_f32_debuginfo(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) !dbg !15 { ; CHECK-LABEL: define void @sincos_f32_debuginfo -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG7:![0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG7:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG14:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !dbg [[DBG14]] @@ -1064,7 +1064,7 @@ entry: define float @sin_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath [[META5]] @@ -1086,7 +1086,7 @@ entry: define float @sin_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath [[META5]] @@ -1110,7 +1110,7 @@ entry: define float @cos_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1132,7 +1132,7 @@ entry: define float @cos_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1156,7 +1156,7 @@ entry: define float @sincos_private_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_private_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1184,7 +1184,7 @@ entry: define float @sincos_generic_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1213,7 +1213,7 @@ entry: define float @sincos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1240,7 +1240,7 @@ entry: define float @sincos_mixed_sin_cos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_mixed_sin_cos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll index 7f2a06af8d10f..d3929a3706ba8 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll @@ -28,7 +28,7 @@ entry: ret void } -; CHECK: define dso_local void @bar(ptr nocapture noundef writeonly %[[p:.*]]) +; CHECK: define dso_local void @bar(ptr nocapture noundef writeonly initializes((0, 4)) %[[p:.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr %[[p]], align 4, !tbaa ; CHECK-NEXT: ret void diff --git a/llvm/test/Other/optimize-inrange-gep.ll b/llvm/test/Other/optimize-inrange-gep.ll index e7465fddd80f0..66cf7f2c17f98 100644 --- a/llvm/test/Other/optimize-inrange-gep.ll +++ b/llvm/test/Other/optimize-inrange-gep.ll @@ -19,7 +19,7 @@ define void @foo(ptr %p) { ; O0-NEXT: ret void ; ; CHECK-LABEL: define void @foo( -; CHECK-SAME: ptr nocapture writeonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store ptr getelementptr inbounds inrange(-24, 0) (i8, ptr @vtable, i64 24), ptr [[P]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index 3740c3d1d8387..f02d0a242dc99 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -116,7 +116,7 @@ define void @my_async_function_pa(ptr %ctxt, ptr %task, ptr %actor) { ; CHECK: @my_async_function_pa_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } ; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } -; CHECK-LABEL: define swiftcc void @my_async_function(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @my_async_function(ptr swiftasync initializes((152, 160)) %async.ctxt, ptr %task, ptr %actor) ; CHECK-O0-LABEL: define swiftcc void @my_async_function(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] { ; CHECK: coro.return: @@ -249,7 +249,7 @@ define swiftcc void @top_level_caller(ptr %ctxt, ptr %task, ptr %actor) { ret void } -; CHECK-LABEL: define swiftcc void @top_level_caller(ptr %ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @top_level_caller(ptr initializes((152, 160)) %ctxt, ptr %task, ptr %actor) ; CHECK: store ptr @my_async_functionTQ0_ ; CHECK: store ptr %ctxt ; CHECK: tail call swiftcc void @asyncSuspend @@ -410,7 +410,7 @@ entry: unreachable } -; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(ptr swiftasync initializes((152, 160)) %async.ctxt, ptr %task, ptr %actor) ; CHECK-LABEL: define internal swiftcc void @polymorphic_suspend_return.resume.0(ptr {{.*}}swiftasync{{.*}} %0, ptr {{.*}}swiftself{{.*}} %1, ptr {{.*}}%2, ptr {{.*}}%3) ; CHECK: } diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll index ea6392714bf6f..5149624428f9d 100644 --- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll @@ -101,7 +101,7 @@ entry: define void @test_only_write_arg(ptr %ptr) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define void @test_only_write_arg -; FNATTRS-SAME: (ptr nocapture writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: store i32 0, ptr [[PTR]], align 4 ; FNATTRS-NEXT: ret void @@ -156,7 +156,7 @@ declare i32 @fn_readnone() readnone define void @test_call_readnone(ptr %ptr) { ; FNATTRS: Function Attrs: memory(argmem: write) ; FNATTRS-LABEL: define void @test_call_readnone -; FNATTRS-SAME: (ptr nocapture writeonly [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[C:%.*]] = call i32 @fn_readnone() ; FNATTRS-NEXT: store i32 [[C]], ptr [[PTR]], align 4 @@ -221,7 +221,7 @@ entry: define void @test_memcpy_argonly(ptr %dst, ptr %src) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_memcpy_argonly -; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) #[[ATTR9:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 32)) [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) #[[ATTR9:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false) ; FNATTRS-NEXT: ret void @@ -245,7 +245,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) define void @test_memcpy_src_global(ptr %dst) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_memcpy_src_global -; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 32)) [[DST:%.*]]) #[[ATTR11:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr @arr, i64 32, i1 false) ; FNATTRS-NEXT: ret void @@ -370,7 +370,7 @@ define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { define void @test_inaccessibleorargmemonly_readwrite(ptr %arg) { ; FNATTRS: Function Attrs: memory(argmem: write, inaccessiblemem: read) ; FNATTRS-LABEL: define void @test_inaccessibleorargmemonly_readwrite -; FNATTRS-SAME: (ptr nocapture writeonly [[ARG:%.*]]) #[[ATTR15:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[ARG:%.*]]) #[[ATTR15:[0-9]+]] { ; FNATTRS-NEXT: store i32 0, ptr [[ARG]], align 4 ; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR19]] ; FNATTRS-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll new file mode 100644 index 0000000000000..d4bfbb9b34c4d --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -0,0 +1,472 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 4 +; RUN: opt -passes=function-attrs -S < %s | FileCheck %s + +define void @basic(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @basic( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + ret void +} + +define void @stores_on_both_paths(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @stores_on_both_paths( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %p + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @store_pointer_to_pointer(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_pointer_to_pointer( +; CHECK-SAME: ptr [[P:%.*]], ptr nocapture writeonly initializes((0, 8)) [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr [[P]], ptr [[P2]], align 8 +; CHECK-NEXT: ret void +; + store ptr %p, ptr %p2 + ret void +} + +; TODO: this is still initializes +define void @store_pointer_to_itself(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_pointer_to_itself( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr [[P]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store ptr %p, ptr %p + ret void +} + +define void @load_before_store(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @load_before_store( +; CHECK-SAME: ptr nocapture [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %a = load i32, ptr %p + store i32 123, ptr %p + ret void +} + +define void @partial_load_before_store(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @partial_load_before_store( +; CHECK-SAME: ptr nocapture initializes((4, 8)) [[P:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %a = load i32, ptr %p + store i64 123, ptr %p + ret void +} + +declare void @use(ptr) + +define void @call_clobber(ptr %p) { +; CHECK-LABEL: define void @call_clobber( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + call void @use(ptr %p) + store i64 123, ptr %p + ret void +} + +define void @call_clobber_after_store(ptr %p) { +; CHECK-LABEL: define void @call_clobber_after_store( +; CHECK-SAME: ptr initializes((0, 8)) [[P:%.*]]) { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + call void @use(ptr %p) + ret void +} + +define void @store_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((8, 12)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: store i32 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 8 + store i32 123, ptr %g + ret void +} + +define void @store_volatile(ptr %p) { +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +; CHECK-LABEL: define void @store_volatile( +; CHECK-SAME: ptr initializes((8, 12)) [[P:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: store volatile i32 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 8 + store volatile i32 123, ptr %g + ret void +} + +define void @merge_store_ranges(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @merge_store_ranges( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: store i32 123, ptr [[G]], align 4 +; CHECK-NEXT: store i32 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 4 + store i32 123, ptr %g + store i32 123, ptr %p + ret void +} + +define void @partially_overlapping_stores_branches(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @partially_overlapping_stores_branches( +; CHECK-SAME: ptr nocapture writeonly initializes((4, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %g = getelementptr i8, ptr %p, i64 4 + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %g + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @non_overlapping_stores_branches(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @non_overlapping_stores_branches( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %g = getelementptr i8, ptr %p, i64 8 + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %g + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @dominating_store(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @dominating_store( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + br label %end +bb2: + br label %end +end: + store i64 321, ptr %p + ret void +} + +define void @call_clobber_on_one_branch(ptr %p, i1 %i) { +; CHECK-LABEL: define void @call_clobber_on_one_branch( +; CHECK-SAME: ptr [[P:%.*]], i1 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + br label %end +bb2: + call void @use(ptr %p) + br label %end +end: + store i64 321, ptr %p + ret void +} + +define void @merge_existing_initializes(ptr initializes((33, 36)) %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @merge_existing_initializes( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8), (33, 36)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + ret void +} + +define void @negative_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @negative_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((-5, 3)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 -5 +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 -5 + store i64 123, ptr %g + ret void +} + +define void @call_clobber_in_entry_block(ptr %p, i1 %i) { +; CHECK-LABEL: define void @call_clobber_in_entry_block( +; CHECK-SAME: ptr [[P:%.*]], i1 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + call void @use(ptr %p) + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %p + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +declare void @g1(ptr initializes((0, 4)) %p) +declare void @g2(ptr initializes((8, 12)) %p) +declare void @g3(ptr initializes((0, 4)) writeonly nocapture %p) +declare void @g4(ptr initializes((0, 4)) readnone nocapture %p) + +define void @call_initializes(ptr %p) { +; CHECK-LABEL: define void @call_initializes( +; CHECK-SAME: ptr initializes((0, 4)) [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) + ret void +} + +define void @call_initializes_clobber(ptr %p) { +; CHECK-LABEL: define void @call_initializes_clobber( +; CHECK-SAME: ptr initializes((0, 4)) [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) +; CHECK-NEXT: call void @g2(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) + call void @g2(ptr %p) + ret void +} + +define void @call_initializes_no_clobber_writeonly_capture(ptr %p) { +; CHECK-LABEL: define void @call_initializes_no_clobber_writeonly_capture( +; CHECK-SAME: ptr initializes((0, 4), (8, 12)) [[P:%.*]]) { +; CHECK-NEXT: call void @g3(ptr [[P]]) +; CHECK-NEXT: call void @g2(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g3(ptr %p) + call void @g2(ptr %p) + ret void +} + +define void @call_initializes_no_clobber_readnone_capture(ptr %p) { +; CHECK-LABEL: define void @call_initializes_no_clobber_readnone_capture( +; CHECK-SAME: ptr initializes((0, 4), (8, 12)) [[P:%.*]]) { +; CHECK-NEXT: call void @g4(ptr [[P]]) +; CHECK-NEXT: call void @g2(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g4(ptr %p) + call void @g2(ptr %p) + ret void +} + +declare void @llvm.memset(ptr, i8, i64 ,i1) + +define void @memset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 9, i1 false) + ret void +} + +define void @memset_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[G]], i8 2, i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memset(ptr %g, i8 2, i64 9, i1 false) + ret void +} + +define void @memset_volatile(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 9, i1 true) + ret void +} + +declare void @llvm.memcpy(ptr, ptr, i64 ,i1) + +define void @memcpy(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memcpy_volatile(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p, ptr %p2, i64 9, i1 true) + ret void +} + +define void @memcpy_offset(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memcpy(ptr %g, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memcpy_src(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_src( +; CHECK-SAME: ptr nocapture initializes((96, 128)) [[P:%.*]], ptr nocapture initializes((0, 96)) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P2]], ptr [[P]], i64 96, i1 false) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 64 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 64, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p2, ptr %p, i64 96, i1 false) + %g = getelementptr i8, ptr %p, i64 64 + call void @llvm.memcpy(ptr %g, ptr %p2, i64 64, i1 false) + ret void +} + +define void @non_const_gep(ptr %p, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @non_const_gep( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 %i + store i64 123, ptr %g + store i64 123, ptr %p + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 7df6132ac6a31..467f1e7c692fa 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -629,7 +629,7 @@ define void @test_atomicrmw(ptr %p) { define void @test_volatile(ptr %x) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @test_volatile -; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; FNATTRS-SAME: (ptr initializes((4, 8)) [[X:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; FNATTRS-NEXT: store volatile i32 0, ptr [[GEP]], align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 39513976f90d7..004c0485d764a 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -107,7 +107,7 @@ define void @test4_2(ptr %p) { define void @test5(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test5 -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { ; FNATTRS-NEXT: store ptr [[Q]], ptr [[P]], align 8 ; FNATTRS-NEXT: ret void ; @@ -132,7 +132,7 @@ declare void @test6_1() ; This is not a missed optz'n. define void @test6_2(ptr %p, ptr %q) { ; FNATTRS-LABEL: define {{[^@]+}}@test6_2 -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], ptr [[Q:%.*]]) { ; FNATTRS-NEXT: store ptr [[Q]], ptr [[P]], align 8 ; FNATTRS-NEXT: call void @test6_1() ; FNATTRS-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index de2d5e2238947..a1f4b0b6e5b1d 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -66,7 +66,7 @@ nouses-argworn-funwo_entry: define void @test_store(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test_store -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 1)) [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; FNATTRS-NEXT: store i8 0, ptr [[P]], align 1 ; FNATTRS-NEXT: ret void ; @@ -107,7 +107,7 @@ define i8 @test_store_capture(ptr %p) { define void @test_addressing(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test_addressing -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((8, 12)) [[P:%.*]]) #[[ATTR3]] { ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 8 ; FNATTRS-NEXT: store i32 0, ptr [[GEP]], align 4 ; FNATTRS-NEXT: ret void @@ -147,7 +147,7 @@ define void @test_readwrite(ptr %p) { define void @test_volatile(ptr %p) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define {{[^@]+}}@test_volatile -; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR6:[0-9]+]] { +; FNATTRS-SAME: (ptr initializes((0, 1)) [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; FNATTRS-NEXT: store volatile i8 0, ptr [[P]], align 1 ; FNATTRS-NEXT: ret void ; diff --git a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll index 3c4138fc4ca49..6122eef8a20d9 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll +++ b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll @@ -12,7 +12,8 @@ ; RUN: opt < %s -passes='memprof-use' -S | FileCheck %s ; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] -; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } +; old: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } +; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) } ; ModuleID = 'memprof_internal_linkage.cc' source_filename = "memprof_internal_linkage.cc" @@ -81,4 +82,4 @@ attributes #5 = { builtin allocsize(0) } !18 = !DILocation(line: 4, column: 8, scope: !16) !19 = !DILocation(line: 5, column: 10, scope: !16) !20 = !DILocation(line: 5, column: 3, scope: !16) -!21 = !DILocation(line: 6, column: 1, scope: !16) \ No newline at end of file +!21 = !DILocation(line: 6, column: 1, scope: !16) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll index 1c9e7a771ca19..ec0c2b40640f4 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, <32 x i8> %_0) #0 { ; CHECK-LABEL: define void @foo( -; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr nocapture writeonly initializes((0, 32)) [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: start: ; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/PhaseOrdering/pr95152.ll b/llvm/test/Transforms/PhaseOrdering/pr95152.ll index 16610c439f4c0..bbe60de0486a7 100644 --- a/llvm/test/Transforms/PhaseOrdering/pr95152.ll +++ b/llvm/test/Transforms/PhaseOrdering/pr95152.ll @@ -21,7 +21,7 @@ define void @j(ptr %p) optnone noinline { define void @h(ptr %p) { ; CHECK-LABEL: define void @h( -; CHECK-SAME: ptr [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr initializes((0, 8)) [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: store i64 3, ptr [[P]], align 4 ; CHECK-NEXT: tail call void @j(ptr nonnull [[P]]) ; CHECK-NEXT: ret void @@ -33,7 +33,7 @@ define void @h(ptr %p) { define void @g(ptr dead_on_unwind noalias writable dereferenceable(8) align 8 %p) minsize { ; CHECK-LABEL: define void @g( -; CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly align 8 dereferenceable(8) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly align 8 dereferenceable(8) initializes((0, 8)) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: tail call void @h(ptr nonnull [[P]]) ; CHECK-NEXT: ret void ; @@ -45,7 +45,7 @@ define void @g(ptr dead_on_unwind noalias writable dereferenceable(8) align 8 %p define void @f(ptr dead_on_unwind noalias %p) { ; CHECK-LABEL: define void @f( -; CHECK-SAME: ptr dead_on_unwind noalias [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr dead_on_unwind noalias initializes((0, 8)) [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: store i64 3, ptr [[P]], align 4 ; CHECK-NEXT: tail call void @j(ptr nonnull [[P]]) ; CHECK-NEXT: store i64 43, ptr [[P]], align 4 From d4d49d3aba60872dcad36c3866a95d7c6892cccb Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Mon, 8 Jul 2024 22:27:53 +0000 Subject: [PATCH 02/11] Update the inference code, comments, and tests --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 193 ++++++++++-------- .../Transforms/FunctionAttrs/initializes.ll | 109 ++++++++-- 2 files changed, 203 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 507dbf4ef26f0..76e10a88a9444 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -582,6 +582,8 @@ struct ArgumentUsesTracker : public CaptureTracker { const SCCNodeSet &SCCNodes; }; +// A struct of argument use: a Use and the offset it accesses. This struct +// is to track uses inside function via GEP. struct ArgumentUse { Use *U; std::optional Offset; @@ -594,29 +596,29 @@ struct ArgumentUse { // Write or Read accesses can be clobbers as well for example, a Load with // scalable type. struct ArgumentAccessInfo { - enum AccessType { Write, Read, Unknown }; + enum class AccessType : uint8_t { Write, Read, Unknown }; AccessType ArgAccessType; - ConstantRangeList AccessRanges; bool IsClobber = false; + ConstantRangeList AccessRanges; }; struct UsesPerBlockInfo { DenseMap Insts; - bool HasWrites; - bool HasClobber; + bool HasWrites = false; + bool HasClobber = false; }; ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, - const ArgumentUse &IU, + const ArgumentUse &ArgUse, const DataLayout &DL) { auto GetTypeAccessRange = [&DL](Type *Ty, std::optional Offset) -> std::optional { auto TypeSize = DL.getTypeStoreSize(Ty); - if (!TypeSize.isScalable() && Offset.has_value()) { + if (!TypeSize.isScalable() && Offset) { int64_t Size = TypeSize.getFixedValue(); - return ConstantRange(APInt(64, Offset.value(), true), - APInt(64, Offset.value() + Size, true)); + return ConstantRange(APInt(64, *Offset, true), + APInt(64, *Offset + Size, true)); } return std::nullopt; }; @@ -624,97 +626,101 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, [](Value *Length, std::optional Offset) -> std::optional { auto *ConstantLength = dyn_cast(Length); - if (ConstantLength && Offset.has_value()) { + if (ConstantLength && Offset) return ConstantRange( - APInt(64, Offset.value(), true), - APInt(64, Offset.value() + ConstantLength->getSExtValue(), true)); - } + APInt(64, *Offset, true), + APInt(64, *Offset + ConstantLength->getSExtValue(), true)); return std::nullopt; }; if (auto *SI = dyn_cast(I)) { - if (&SI->getOperandUse(1) == IU.U) { + if (&SI->getOperandUse(1) == ArgUse.U) { // Get the fixed type size of "SI". Since the access range of a write // will be unioned, if "SI" doesn't have a fixed type size, we just set // the access range to empty. ConstantRangeList AccessRanges; - auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), IU.Offset); - if (TypeAccessRange.has_value()) - AccessRanges.insert(TypeAccessRange.value()); - return {ArgumentAccessInfo::AccessType::Write, AccessRanges, - /*IsClobber=*/false}; + if (auto TypeAccessRange = + GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) + AccessRanges.insert(*TypeAccessRange); + return {ArgumentAccessInfo::AccessType::Write, + /*IsClobber=*/false, AccessRanges}; } } else if (auto *LI = dyn_cast(I)) { - if (&LI->getOperandUse(0) == IU.U) { + if (&LI->getOperandUse(0) == ArgUse.U) { // Get the fixed type size of "LI". Different from Write, if "LI" // doesn't have a fixed type size, we conservatively set as a clobber // with an empty access range. - auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), IU.Offset); - if (TypeAccessRange.has_value()) + if (auto TypeAccessRange = + GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) return {ArgumentAccessInfo::AccessType::Read, - {TypeAccessRange.value()}, - /*IsClobber=*/false}; + /*IsClobber=*/false, + {*TypeAccessRange}}; else - return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true}; + return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; } } else if (auto *MemSet = dyn_cast(I)) { if (!MemSet->isVolatile()) { ConstantRangeList AccessRanges; - auto AccessRange = GetConstantIntRange(MemSet->getLength(), IU.Offset); - if (AccessRange.has_value()) - AccessRanges.insert(AccessRange.value()); - return {ArgumentAccessInfo::AccessType::Write, AccessRanges, - /*IsClobber=*/false}; + if (auto AccessRange = + GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) + AccessRanges.insert(*AccessRange); + return {ArgumentAccessInfo::AccessType::Write, + /*IsClobber=*/false, AccessRanges}; } - } else if (auto *MemCpy = dyn_cast(I)) { - if (!MemCpy->isVolatile()) { - if (&MemCpy->getOperandUse(0) == IU.U) { + } else if (auto *MTI = dyn_cast(I)) { + if (!MTI->isVolatile()) { + if (&MTI->getOperandUse(0) == ArgUse.U) { ConstantRangeList AccessRanges; - auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset); - if (AccessRange.has_value()) - AccessRanges.insert(AccessRange.value()); - return {ArgumentAccessInfo::AccessType::Write, AccessRanges, - /*IsClobber=*/false}; - } else if (&MemCpy->getOperandUse(1) == IU.U) { - auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset); - if (AccessRange.has_value()) + if (auto AccessRange = + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) + AccessRanges.insert(*AccessRange); + return {ArgumentAccessInfo::AccessType::Write, + /*IsClobber=*/false, AccessRanges}; + } else if (&MTI->getOperandUse(1) == ArgUse.U) { + if (auto AccessRange = + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) return {ArgumentAccessInfo::AccessType::Read, - {AccessRange.value()}, - /*IsClobber=*/false}; + /*IsClobber=*/false, + {*AccessRange}}; else - return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true}; + return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; + } else { + return { + ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; } } } else if (auto *CB = dyn_cast(I)) { - if (CB->isArgOperand(IU.U)) { - unsigned ArgNo = CB->getArgOperandNo(IU.U); + if (CB->isArgOperand(ArgUse.U)) { + unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); // Argument is only not clobbered when parameter is writeonly/readnone // and nocapture. bool IsClobber = !(CB->onlyWritesMemory(ArgNo) && CB->paramHasAttr(ArgNo, Attribute::NoCapture)); ConstantRangeList AccessRanges; - if (IsInitialize && IU.Offset.has_value()) { + if (IsInitialize && ArgUse.Offset) { Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); - if (!Attr.isValid()) { + if (!Attr.isValid()) Attr = CB->getCalledFunction()->getParamAttribute( ArgNo, Attribute::Initializes); - } ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); - for (ConstantRange &CR : CBCRL) { - AccessRanges.insert(ConstantRange(CR.getLower() + IU.Offset.value(), - CR.getUpper() + IU.Offset.value())); - } - return {ArgumentAccessInfo::AccessType::Write, AccessRanges, IsClobber}; + for (ConstantRange &CR : CBCRL) + AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, + CR.getUpper() + *ArgUse.Offset)); + return {ArgumentAccessInfo::AccessType::Write, IsClobber, AccessRanges}; } } } // Unrecognized instructions are considered clobbers. - return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true}; + return {ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; } +// Collect the uses of argument "A" in "F" and store the uses info per block to +// "UsesPerBlock". Return a pair of bool that indicate whether there is any +// write access, and whether there is any write access outside of the entry +// block in "F", which will be used to simplify the inference for simple cases. std::pair CollectArgumentUsesPerBlock( Argument &A, Function &F, - DenseMap &UsesPerBlock) { + SmallDenseMap &UsesPerBlock) { auto &DL = F.getParent()->getDataLayout(); auto PointerSize = DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); @@ -727,6 +733,8 @@ std::pair CollectArgumentUsesPerBlock( for (Use &U : A.uses()) Worklist.push_back({&U, 0}); + // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. + // Return true if the block of "I" has write accesses after updating. auto UpdateUseInfo = [&UsesPerBlock](Instruction *I, ArgumentAccessInfo Info) { auto *BB = I->getParent(); @@ -737,46 +745,45 @@ std::pair CollectArgumentUsesPerBlock( // Instructions that have more than one use of the argument are considered // as clobbers. if (AlreadyVisitedInst) { - IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}, true}; + IInfo = {ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; BBInfo.HasClobber = true; return false; } IInfo = Info; BBInfo.HasClobber |= IInfo.IsClobber; - BBInfo.HasWrites |= - (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && - !IInfo.AccessRanges.empty()); - return !IInfo.AccessRanges.empty(); + bool InfoHasWrites = + IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && + !IInfo.AccessRanges.empty(); + BBInfo.HasWrites |= InfoHasWrites; + return InfoHasWrites; }; // No need for a visited set because we don't look through phis, so there are // no cycles. while (!Worklist.empty()) { - ArgumentUse IU = Worklist.pop_back_val(); - User *U = IU.U->getUser(); + ArgumentUse ArgUse = Worklist.pop_back_val(); + User *U = ArgUse.U->getUser(); // Add GEP uses to worklist. - // If the GEP is not a constant GEP, set IsInitialize to false. + // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. if (auto *GEP = dyn_cast(U)) { APInt Offset(PointerSize, 0, /*isSigned=*/true); bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset); std::optional NewOffset = std::nullopt; - if (IsConstGEP && IU.Offset.has_value()) { - NewOffset = *IU.Offset + Offset.getSExtValue(); - } + if (IsConstGEP && ArgUse.Offset) + NewOffset = *ArgUse.Offset + Offset.getSExtValue(); for (Use &U : GEP->uses()) Worklist.push_back({&U, NewOffset}); continue; } auto *I = cast(U); - bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, IU, DL)); + bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, ArgUse, DL)); HasAnyWrite |= HasWrite; - if (HasWrite && I->getParent() != &EntryBB) { + if (HasWrite && I->getParent() != &EntryBB) HasWriteOutsideEntryBB = true; - } } return {HasAnyWrite, HasWriteOutsideEntryBB}; } @@ -1068,7 +1075,7 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { } static bool inferInitializes(Argument &A, Function &F) { - DenseMap UsesPerBlock; + SmallDenseMap UsesPerBlock; auto [HasAnyWrite, HasWriteOutsideEntryBB] = CollectArgumentUsesPerBlock(A, F, UsesPerBlock); // No write anywhere in the function, bail. @@ -1119,18 +1126,16 @@ static bool inferInitializes(Argument &A, Function &F) { // From the end of the block to the beginning of the block, set // initializes ranges. - for (auto [_, Info] : reverse(Insts)) { - if (Info.IsClobber) { + for (auto &[_, Info] : reverse(Insts)) { + if (Info.IsClobber) CRL = ConstantRangeList(); - } if (!Info.AccessRanges.empty()) { if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) { CRL = CRL.unionWith(Info.AccessRanges); } else { assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); - for (const auto &ReadRange : Info.AccessRanges) { + for (const auto &ReadRange : Info.AccessRanges) CRL.subtract(ReadRange); - } } } } @@ -1142,31 +1147,26 @@ static bool inferInitializes(Argument &A, Function &F) { // If all write instructions are in the EntryBB, or if the EntryBB has // a clobbering use, we only need to look at EntryBB. bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB; - if (!OnlyScanEntryBlock) { + if (!OnlyScanEntryBlock) if (auto EntryUPB = UsesPerBlock.find(&EntryBB); - EntryUPB != UsesPerBlock.end()) { + EntryUPB != UsesPerBlock.end()) OnlyScanEntryBlock = EntryUPB->second.HasClobber; - } - } if (OnlyScanEntryBlock) { EntryCRL = VisitBlock(&EntryBB); - if (EntryCRL.empty()) { + if (EntryCRL.empty()) return false; - } } else { // Visit successors before predecessors with a post-order walk of the // blocks. for (const BasicBlock *BB : post_order(&F)) { ConstantRangeList CRL = VisitBlock(BB); - if (!CRL.empty()) { + if (!CRL.empty()) Initialized[BB] = CRL; - } } auto EntryCRLI = Initialized.find(&EntryBB); - if (EntryCRLI == Initialized.end()) { + if (EntryCRLI == Initialized.end()) return false; - } EntryCRL = EntryCRLI->second; } @@ -1177,9 +1177,8 @@ static bool inferInitializes(Argument &A, Function &F) { if (A.hasAttribute(Attribute::Initializes)) { ConstantRangeList PreviousCRL = A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); - if (PreviousCRL == EntryCRL) { + if (PreviousCRL == EntryCRL) return false; - } EntryCRL = EntryCRL.unionWith(PreviousCRL); } @@ -2172,6 +2171,26 @@ deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter, SmallSet Changed; if (ArgAttrsOnly) { + // To get precise function attributes fastly, the main postorder CGSCC + // pipeline runs PostOrderFunctionAttrsPass twice, and the function + // simplification pipeline is scheduled in the middle. + // + // The first run deduces function attributes that could affect the function + // simplification pipeline, which is only the case with recursive functions. + // For non-recursive functions, it only infers argument attributes. + // The second run deduces any function attributes based on the fully + // simplified function + // + // PostOrderFunctionAttrsPass operates the call graph in "bottom-up" way: + // PostOrderFunctionAttrsPass(callee, ArgAttrsOnly) -> + // FunctionSimplificationPipeline {DSE(callee), ...} -> + // PostOrderFunctionAttrsPass2(callee) -> + // PostOrderFunctionAttrsPass(caller, ArgAttrsOnly) -> + // FunctionSimplificationPipeline {DSE(caller), ...} -> + // PostOrderFunctionAttrsPass2(caller) + // Only infer the "initializes" attribute in the 2nd run to get a precise + // attribute of callee which would be used to simplify callers in the + // function simplification pipeline (like DSE). addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); return Changed; } diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll index d4bfbb9b34c4d..e17c2038d3a42 100644 --- a/llvm/test/Transforms/FunctionAttrs/initializes.ll +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -288,6 +288,21 @@ define void @negative_offset(ptr %p) { ret void } +define void @non_const_gep(ptr %p, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @non_const_gep( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 %i + store i64 123, ptr %g + store i64 123, ptr %p + ret void +} + define void @call_clobber_in_entry_block(ptr %p, i1 %i) { ; CHECK-LABEL: define void @call_clobber_in_entry_block( ; CHECK-SAME: ptr [[P:%.*]], i1 [[I:%.*]]) { @@ -343,8 +358,8 @@ define void @call_initializes_clobber(ptr %p) { ret void } -define void @call_initializes_no_clobber_writeonly_capture(ptr %p) { -; CHECK-LABEL: define void @call_initializes_no_clobber_writeonly_capture( +define void @call_initializes_no_clobber_writeonly_nocapture(ptr %p) { +; CHECK-LABEL: define void @call_initializes_no_clobber_writeonly_nocapture( ; CHECK-SAME: ptr initializes((0, 4), (8, 12)) [[P:%.*]]) { ; CHECK-NEXT: call void @g3(ptr [[P]]) ; CHECK-NEXT: call void @g2(ptr [[P]]) @@ -404,6 +419,17 @@ define void @memset_volatile(ptr %p) { ret void } +define void @memset_non_constant(ptr %p, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 [[I]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 %i, i1 false) + ret void +} + declare void @llvm.memcpy(ptr, ptr, i64 ,i1) define void @memcpy(ptr %p, ptr %p2) { @@ -456,17 +482,76 @@ define void @memcpy_src(ptr %p, ptr %p2) { ret void } -define void @non_const_gep(ptr %p, i64 %i) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) -; CHECK-LABEL: define void @non_const_gep( -; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] -; CHECK-NEXT: store i64 123, ptr [[G]], align 4 -; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +define void @memcpy_non_constant(ptr %p, ptr %p2, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[P2:%.*]], i64 [[I:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 [[I]], i1 false) ; CHECK-NEXT: ret void ; - %g = getelementptr i8, ptr %p, i64 %i - store i64 123, ptr %g - store i64 123, ptr %p + call void @llvm.memcpy(ptr %p, ptr %p2, i64 %i, i1 false) + ret void +} + +declare void @llvm.memmove(ptr, ptr, i64 ,i1) + +define void @memmove(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memmove_volatile(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 9, i1 true) + ret void +} + +define void @memmove_offset(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memmove(ptr %g, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memmove_src(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_src( +; CHECK-SAME: ptr nocapture initializes((96, 128)) [[P:%.*]], ptr nocapture initializes((0, 96)) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P2]], ptr [[P]], i64 96, i1 false) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 64 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 64, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p2, ptr %p, i64 96, i1 false) + %g = getelementptr i8, ptr %p, i64 64 + call void @llvm.memmove(ptr %g, ptr %p2, i64 64, i1 false) + ret void +} + +define void @memmove_non_constant(ptr %p, ptr %p2, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[P2:%.*]], i64 [[I:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 [[I]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 %i, i1 false) ret void } From 07f73dd7645d4d904c6a857e4fd402f093c4aed2 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Mon, 8 Jul 2024 22:34:43 +0000 Subject: [PATCH 03/11] Update a comment --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 76e10a88a9444..4d6ad67fb7fbf 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -583,7 +583,8 @@ struct ArgumentUsesTracker : public CaptureTracker { }; // A struct of argument use: a Use and the offset it accesses. This struct -// is to track uses inside function via GEP. +// is to track uses inside function via GEP. If GEP has a non-constant index, +// the Offset field is nullopt. struct ArgumentUse { Use *U; std::optional Offset; From cb144c0c94711f77b5672e4d0910bd002a5ff6a7 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Mon, 8 Jul 2024 23:17:22 +0000 Subject: [PATCH 04/11] Change UsesPerBlockInfo::Insts to SmallDenseMap<..., 2> --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 4d6ad67fb7fbf..3e5759b4a4f16 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -604,7 +604,7 @@ struct ArgumentAccessInfo { }; struct UsesPerBlockInfo { - DenseMap Insts; + SmallDenseMap Insts; bool HasWrites = false; bool HasClobber = false; }; From 9585684d54abf9eb4393a9f919e33e7f006abacf Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Mon, 8 Jul 2024 23:26:03 +0000 Subject: [PATCH 05/11] Remove else after if-return --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 3e5759b4a4f16..39267d4ca8d5d 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -655,8 +655,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/false, {*TypeAccessRange}}; - else - return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; + return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; } } else if (auto *MemSet = dyn_cast(I)) { if (!MemSet->isVolatile()) { @@ -682,11 +681,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/false, {*AccessRange}}; - else - return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; - } else { - return { - ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; + return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; } } } else if (auto *CB = dyn_cast(I)) { From 0863cce6fed6d3b877ece0eaab8424c548a04555 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Tue, 9 Jul 2024 17:42:08 +0000 Subject: [PATCH 06/11] Fix the memprof_internal_linkage.ll test --- .../Transforms/PGOProfile/memprof_internal_linkage.ll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll index 6122eef8a20d9..40cb25c62d724 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll +++ b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll @@ -12,8 +12,7 @@ ; RUN: opt < %s -passes='memprof-use' -S | FileCheck %s ; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] -; old: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } -; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) } +; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } ; ModuleID = 'memprof_internal_linkage.cc' source_filename = "memprof_internal_linkage.cc" @@ -29,12 +28,12 @@ entry: store i32 0, ptr %retval, align 4 store i32 %argc, ptr %argc.addr, align 4 store ptr %argv, ptr %argv.addr, align 8 - call void @_ZL3foov.__uniq.50354172613129440706982166615384819716() #4, !dbg !14 + call void @_ZL3foov.__uniq.246575255519150625886541854978321354160() #4, !dbg !14 ret i32 0, !dbg !15 } ; Function Attrs: mustprogress noinline optnone uwtable -define internal void @_ZL3foov.__uniq.50354172613129440706982166615384819716() #1 !dbg !16 { +define internal void @_ZL3foov.__uniq.246575255519150625886541854978321354160() #1 !dbg !16 { entry: %a = alloca ptr, align 8 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 20) #5, !dbg !17 @@ -77,7 +76,7 @@ attributes #5 = { builtin allocsize(0) } !13 = !{} !14 = !DILocation(line: 8, column: 3, scope: !10) !15 = !DILocation(line: 9, column: 3, scope: !10) -!16 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3foov.__uniq.50354172613129440706982166615384819716", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0) +!16 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3foov.__uniq.246575255519150625886541854978321354160", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0) !17 = !DILocation(line: 4, column: 12, scope: !16) !18 = !DILocation(line: 4, column: 8, scope: !16) !19 = !DILocation(line: 5, column: 10, scope: !16) From e9578c69bf1d9be3df0620181b93a050824387b7 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Tue, 9 Jul 2024 19:51:57 +0000 Subject: [PATCH 07/11] Update the SmallDenseMap size to 4 and 16 --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 39267d4ca8d5d..bd99733a7cecf 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -604,7 +604,7 @@ struct ArgumentAccessInfo { }; struct UsesPerBlockInfo { - SmallDenseMap Insts; + SmallDenseMap Insts; bool HasWrites = false; bool HasClobber = false; }; @@ -716,7 +716,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, // block in "F", which will be used to simplify the inference for simple cases. std::pair CollectArgumentUsesPerBlock( Argument &A, Function &F, - SmallDenseMap &UsesPerBlock) { + SmallDenseMap &UsesPerBlock) { auto &DL = F.getParent()->getDataLayout(); auto PointerSize = DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); @@ -1071,7 +1071,7 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { } static bool inferInitializes(Argument &A, Function &F) { - SmallDenseMap UsesPerBlock; + SmallDenseMap UsesPerBlock; auto [HasAnyWrite, HasWriteOutsideEntryBB] = CollectArgumentUsesPerBlock(A, F, UsesPerBlock); // No write anywhere in the function, bail. From 21debb58cd9459e1a3e6a74620bbdb744c6dfb69 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Tue, 16 Jul 2024 04:27:44 +0000 Subject: [PATCH 08/11] Exclude volatile Store/Load while inferring initializes attr --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 4 ++-- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll | 2 +- llvm/test/Transforms/FunctionAttrs/initializes.ll | 2 +- llvm/test/Transforms/FunctionAttrs/nocapture.ll | 2 +- llvm/test/Transforms/FunctionAttrs/writeonly.ll | 2 +- llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index bd99733a7cecf..8b3eb47dcadee 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -634,7 +634,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, return std::nullopt; }; if (auto *SI = dyn_cast(I)) { - if (&SI->getOperandUse(1) == ArgUse.U) { + if (!SI->isVolatile() && &SI->getOperandUse(1) == ArgUse.U) { // Get the fixed type size of "SI". Since the access range of a write // will be unioned, if "SI" doesn't have a fixed type size, we just set // the access range to empty. @@ -646,7 +646,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, /*IsClobber=*/false, AccessRanges}; } } else if (auto *LI = dyn_cast(I)) { - if (&LI->getOperandUse(0) == ArgUse.U) { + if (!LI->isVolatile() && &LI->getOperandUse(0) == ArgUse.U) { // Get the fixed type size of "LI". Different from Write, if "LI" // doesn't have a fixed type size, we conservatively set as a clobber // with an empty access range. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index 9bc0c9974865e..07587eaacd703 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -914,7 +914,7 @@ entry: define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define void @sincos_f32_repeated_uses -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll index e17c2038d3a42..0243c9113df7b 100644 --- a/llvm/test/Transforms/FunctionAttrs/initializes.ll +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -130,7 +130,7 @@ define void @store_offset(ptr %p) { define void @store_volatile(ptr %p) { ; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define void @store_volatile( -; CHECK-SAME: ptr initializes((8, 12)) [[P:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 ; CHECK-NEXT: store volatile i32 123, ptr [[G]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 467f1e7c692fa..7df6132ac6a31 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -629,7 +629,7 @@ define void @test_atomicrmw(ptr %p) { define void @test_volatile(ptr %x) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @test_volatile -; FNATTRS-SAME: (ptr initializes((4, 8)) [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; FNATTRS-NEXT: store volatile i32 0, ptr [[GEP]], align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index a1f4b0b6e5b1d..ba546aff6e621 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -147,7 +147,7 @@ define void @test_readwrite(ptr %p) { define void @test_volatile(ptr %p) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define {{[^@]+}}@test_volatile -; FNATTRS-SAME: (ptr initializes((0, 1)) [[P:%.*]]) #[[ATTR6:[0-9]+]] { +; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; FNATTRS-NEXT: store volatile i8 0, ptr [[P]], align 1 ; FNATTRS-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll index bd910b82496fd..5e6eab9d80736 100644 --- a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll +++ b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll @@ -10,7 +10,7 @@ define void @memcpy_forward_back_with_offset(ptr %arg) { ; CUSTOM-NEXT: ret void ; ; O2-LABEL: define void @memcpy_forward_back_with_offset( -; O2-SAME: ptr nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O2-SAME: ptr nocapture writeonly initializes((0, 1)) [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; O2-NEXT: store i8 1, ptr [[ARG]], align 1 ; O2-NEXT: ret void ; From b1841e198c9fc4a3d75e7d902e1813749d378716 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Fri, 19 Jul 2024 00:10:32 +0000 Subject: [PATCH 09/11] Update CollectArgumentUsesPerBlock() and comments --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 88 ++++++++----------- .../Transforms/FunctionAttrs/initializes.ll | 32 ++++++- 2 files changed, 65 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8b3eb47dcadee..3b189842ba22a 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -603,12 +603,20 @@ struct ArgumentAccessInfo { ConstantRangeList AccessRanges; }; +// A struct to wrap the argument use info per block. struct UsesPerBlockInfo { SmallDenseMap Insts; bool HasWrites = false; bool HasClobber = false; }; +// A struct to summarize the argument use info in a function. +struct ArgumentUsesSummary { + bool HasAnyWrite = false; + bool HasWriteOutsideEntryBB = false; + SmallDenseMap UsesPerBlock; +}; + ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, const ArgumentUse &ArgUse, const DataLayout &DL) { @@ -646,7 +654,8 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, /*IsClobber=*/false, AccessRanges}; } } else if (auto *LI = dyn_cast(I)) { - if (!LI->isVolatile() && &LI->getOperandUse(0) == ArgUse.U) { + if (!LI->isVolatile()) { + assert(&LI->getOperandUse(0) == ArgUse.U); // Get the fixed type size of "LI". Different from Write, if "LI" // doesn't have a fixed type size, we conservatively set as a clobber // with an empty access range. @@ -695,9 +704,6 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, ConstantRangeList AccessRanges; if (IsInitialize && ArgUse.Offset) { Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); - if (!Attr.isValid()) - Attr = CB->getCalledFunction()->getParamAttribute( - ArgNo, Attribute::Initializes); ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); for (ConstantRange &CR : CBCRL) AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, @@ -710,19 +716,12 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, return {ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; } -// Collect the uses of argument "A" in "F" and store the uses info per block to -// "UsesPerBlock". Return a pair of bool that indicate whether there is any -// write access, and whether there is any write access outside of the entry -// block in "F", which will be used to simplify the inference for simple cases. -std::pair CollectArgumentUsesPerBlock( - Argument &A, Function &F, - SmallDenseMap &UsesPerBlock) { +// Collect the uses of argument "A" in "F". +ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { auto &DL = F.getParent()->getDataLayout(); auto PointerSize = DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); - - bool HasAnyWrite = false; - bool HasWriteOutsideEntryBB = false; + ArgumentUsesSummary Result; BasicBlock &EntryBB = F.getEntryBlock(); SmallVector Worklist; @@ -731,10 +730,9 @@ std::pair CollectArgumentUsesPerBlock( // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. // Return true if the block of "I" has write accesses after updating. - auto UpdateUseInfo = [&UsesPerBlock](Instruction *I, - ArgumentAccessInfo Info) { + auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { auto *BB = I->getParent(); - auto &BBInfo = UsesPerBlock.getOrInsertDefault(BB); + auto &BBInfo = Result.UsesPerBlock.getOrInsertDefault(BB); bool AlreadyVisitedInst = BBInfo.Insts.contains(I); auto &IInfo = BBInfo.Insts[I]; @@ -776,12 +774,12 @@ std::pair CollectArgumentUsesPerBlock( auto *I = cast(U); bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, ArgUse, DL)); - HasAnyWrite |= HasWrite; + Result.HasAnyWrite |= HasWrite; if (HasWrite && I->getParent() != &EntryBB) - HasWriteOutsideEntryBB = true; + Result.HasWriteOutsideEntryBB = true; } - return {HasAnyWrite, HasWriteOutsideEntryBB}; + return Result; } } // end anonymous namespace @@ -1071,23 +1069,21 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { } static bool inferInitializes(Argument &A, Function &F) { - SmallDenseMap UsesPerBlock; - auto [HasAnyWrite, HasWriteOutsideEntryBB] = - CollectArgumentUsesPerBlock(A, F, UsesPerBlock); + auto ArgumentUses = CollectArgumentUsesPerBlock(A, F); // No write anywhere in the function, bail. - if (!HasAnyWrite) + if (!ArgumentUses.HasAnyWrite) return false; + auto &UsesPerBlock = ArgumentUses.UsesPerBlock; BasicBlock &EntryBB = F.getEntryBlock(); + // A map to store the argument ranges initialized by a BasicBlock (including + // its successors). DenseMap Initialized; + // Visit the successors of "BB" block and the instructions in BB (post-order) + // to get the argument ranges initialized by "BB" (including its successors). + // The result will be cached in "Initialized". auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { auto UPB = UsesPerBlock.find(BB); - - // If this block has uses and none are writes, the argument is not - // initialized in this block. - if (UPB != UsesPerBlock.end() && !UPB->second.HasWrites) - return ConstantRangeList(); - ConstantRangeList CRL; // Start with intersection of successors. @@ -1142,7 +1138,7 @@ static bool inferInitializes(Argument &A, Function &F) { ConstantRangeList EntryCRL; // If all write instructions are in the EntryBB, or if the EntryBB has // a clobbering use, we only need to look at EntryBB. - bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB; + bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; if (!OnlyScanEntryBlock) if (auto EntryUPB = UsesPerBlock.find(&EntryBB); EntryUPB != UsesPerBlock.end()) @@ -1152,8 +1148,11 @@ static bool inferInitializes(Argument &A, Function &F) { if (EntryCRL.empty()) return false; } else { - // Visit successors before predecessors with a post-order walk of the - // blocks. + // Now we have to go through CFG to get the initialized argument ranges + // across blocks. With dominance and post-dominance, the initialized ranges + // by a block include both accesses inside this block and accesses in its + // (transitive) successors. So visit successors before predecessors with a + // post-order walk of the blocks and memorize the results in "Initialized". for (const BasicBlock *BB : post_order(&F)) { ConstantRangeList CRL = VisitBlock(BB); if (!CRL.empty()) @@ -2167,26 +2166,9 @@ deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter, SmallSet Changed; if (ArgAttrsOnly) { - // To get precise function attributes fastly, the main postorder CGSCC - // pipeline runs PostOrderFunctionAttrsPass twice, and the function - // simplification pipeline is scheduled in the middle. - // - // The first run deduces function attributes that could affect the function - // simplification pipeline, which is only the case with recursive functions. - // For non-recursive functions, it only infers argument attributes. - // The second run deduces any function attributes based on the fully - // simplified function - // - // PostOrderFunctionAttrsPass operates the call graph in "bottom-up" way: - // PostOrderFunctionAttrsPass(callee, ArgAttrsOnly) -> - // FunctionSimplificationPipeline {DSE(callee), ...} -> - // PostOrderFunctionAttrsPass2(callee) -> - // PostOrderFunctionAttrsPass(caller, ArgAttrsOnly) -> - // FunctionSimplificationPipeline {DSE(caller), ...} -> - // PostOrderFunctionAttrsPass2(caller) - // Only infer the "initializes" attribute in the 2nd run to get a precise - // attribute of callee which would be used to simplify callers in the - // function simplification pipeline (like DSE). + // ArgAttrsOnly means to only infer attributes that may aid optimizations + // on the *current* function. "initializes" attribute is to aid + // optimizations (like DSE) on the callers, so skip "initializes" here. addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); return Changed; } diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll index 0243c9113df7b..67e623a98fb90 100644 --- a/llvm/test/Transforms/FunctionAttrs/initializes.ll +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -156,10 +156,11 @@ define void @merge_store_ranges(ptr %p) { } define void @partially_overlapping_stores_branches(ptr %p, i1 %i) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define void @partially_overlapping_stores_branches( -; CHECK-SAME: ptr nocapture writeonly initializes((4, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr nocapture initializes((4, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]] ; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 4 ; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: @@ -172,6 +173,7 @@ define void @partially_overlapping_stores_branches(ptr %p, i1 %i) { ; CHECK-NEXT: ret void ; entry: + %a = load i32, ptr %p %g = getelementptr i8, ptr %p, i64 4 br i1 %i, label %bb1, label %bb2 bb1: @@ -382,6 +384,32 @@ define void @call_initializes_no_clobber_readnone_capture(ptr %p) { ret void } +define void @call_initializes_escape_bundle(ptr %p) { +; CHECK-LABEL: define void @call_initializes_escape_bundle( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) [ "unknown"(ptr [[P]]) ] +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) ["unknown"(ptr %p)] + ret void +} + +define void @access_bundle() { + %sink = alloca i64, align 8 + store i64 123, ptr %sink + ret void +} + +define void @call_operand_bundle(ptr %p) { +; CHECK-LABEL: define void @call_operand_bundle( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @access_bundle() [ "unknown"(ptr [[P]]) ] +; CHECK-NEXT: ret void +; + call void @access_bundle() ["unknown"(ptr %p)] + ret void +} + declare void @llvm.memset(ptr, i8, i64 ,i1) define void @memset(ptr %p) { From 08ef40002f9ec4f23327573500bd33fe384c22cb Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Fri, 19 Jul 2024 20:14:11 +0000 Subject: [PATCH 10/11] Update tests --- llvm/test/Transforms/FunctionAttrs/initializes.ll | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll index 67e623a98fb90..2aa8385fe4ca7 100644 --- a/llvm/test/Transforms/FunctionAttrs/initializes.ll +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -336,7 +336,6 @@ end: declare void @g1(ptr initializes((0, 4)) %p) declare void @g2(ptr initializes((8, 12)) %p) declare void @g3(ptr initializes((0, 4)) writeonly nocapture %p) -declare void @g4(ptr initializes((0, 4)) readnone nocapture %p) define void @call_initializes(ptr %p) { ; CHECK-LABEL: define void @call_initializes( @@ -372,18 +371,6 @@ define void @call_initializes_no_clobber_writeonly_nocapture(ptr %p) { ret void } -define void @call_initializes_no_clobber_readnone_capture(ptr %p) { -; CHECK-LABEL: define void @call_initializes_no_clobber_readnone_capture( -; CHECK-SAME: ptr initializes((0, 4), (8, 12)) [[P:%.*]]) { -; CHECK-NEXT: call void @g4(ptr [[P]]) -; CHECK-NEXT: call void @g2(ptr [[P]]) -; CHECK-NEXT: ret void -; - call void @g4(ptr %p) - call void @g2(ptr %p) - ret void -} - define void @call_initializes_escape_bundle(ptr %p) { ; CHECK-LABEL: define void @call_initializes_escape_bundle( ; CHECK-SAME: ptr [[P:%.*]]) { From eb2d9cf0261f1c4b58278070e1fc789abf75dac7 Mon Sep 17 00:00:00 2001 From: Haopeng Liu Date: Sun, 10 Nov 2024 05:33:52 +0000 Subject: [PATCH 11/11] Update and rename clobber code --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 110 +++++++++++----------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 4019a5dc76f19..afb0ea72b269c 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -583,42 +583,39 @@ struct ArgumentUsesTracker : public CaptureTracker { const SCCNodeSet &SCCNodes; }; -// A struct of argument use: a Use and the offset it accesses. This struct -// is to track uses inside function via GEP. If GEP has a non-constant index, -// the Offset field is nullopt. +/// A struct of argument use: a Use and the offset it accesses. This struct +/// is to track uses inside function via GEP. If GEP has a non-constant index, +/// the Offset field is nullopt. struct ArgumentUse { Use *U; std::optional Offset; }; -// A struct of argument access info. "Unknown" accesses are the cases like -// unrecognized instructions, instructions that have more than one use of -// the argument, or volatile memory accesses. "Unknown" implies "IsClobber" -// and an empty access range. -// Write or Read accesses can be clobbers as well for example, a Load with -// scalable type. +/// A struct of argument access info. "Unknown" accesses are the cases like +/// unrecognized instructions, instructions that have more than one use of +/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call +/// instructions that not only write an argument but also capture it. struct ArgumentAccessInfo { - enum class AccessType : uint8_t { Write, Read, Unknown }; + enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; AccessType ArgAccessType; - bool IsClobber = false; ConstantRangeList AccessRanges; }; -// A struct to wrap the argument use info per block. +/// A struct to wrap the argument use info per block. struct UsesPerBlockInfo { SmallDenseMap Insts; bool HasWrites = false; - bool HasClobber = false; + bool HasUnknownAccess = false; }; -// A struct to summarize the argument use info in a function. +/// A struct to summarize the argument use info in a function. struct ArgumentUsesSummary { bool HasAnyWrite = false; bool HasWriteOutsideEntryBB = false; SmallDenseMap UsesPerBlock; }; -ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, +ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I, const ArgumentUse &ArgUse, const DataLayout &DL) { auto GetTypeAccessRange = @@ -643,7 +640,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, return std::nullopt; }; if (auto *SI = dyn_cast(I)) { - if (!SI->isVolatile() && &SI->getOperandUse(1) == ArgUse.U) { + if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { // Get the fixed type size of "SI". Since the access range of a write // will be unioned, if "SI" doesn't have a fixed type size, we just set // the access range to empty. @@ -651,21 +648,17 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, if (auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) AccessRanges.insert(*TypeAccessRange); - return {ArgumentAccessInfo::AccessType::Write, - /*IsClobber=*/false, AccessRanges}; + return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; } } else if (auto *LI = dyn_cast(I)) { - if (!LI->isVolatile()) { + if (LI->isSimple()) { assert(&LI->getOperandUse(0) == ArgUse.U); // Get the fixed type size of "LI". Different from Write, if "LI" // doesn't have a fixed type size, we conservatively set as a clobber // with an empty access range. if (auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) - return {ArgumentAccessInfo::AccessType::Read, - /*IsClobber=*/false, - {*TypeAccessRange}}; - return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; + return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; } } else if (auto *MemSet = dyn_cast(I)) { if (!MemSet->isVolatile()) { @@ -673,8 +666,7 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, if (auto AccessRange = GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) AccessRanges.insert(*AccessRange); - return {ArgumentAccessInfo::AccessType::Write, - /*IsClobber=*/false, AccessRanges}; + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; } } else if (auto *MTI = dyn_cast(I)) { if (!MTI->isVolatile()) { @@ -683,25 +675,23 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, if (auto AccessRange = GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) AccessRanges.insert(*AccessRange); - return {ArgumentAccessInfo::AccessType::Write, - /*IsClobber=*/false, AccessRanges}; + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; } else if (&MTI->getOperandUse(1) == ArgUse.U) { if (auto AccessRange = GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) - return {ArgumentAccessInfo::AccessType::Read, - /*IsClobber=*/false, - {*AccessRange}}; - return {ArgumentAccessInfo::AccessType::Read, /*IsClobber=*/true, {}}; + return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; } } } else if (auto *CB = dyn_cast(I)) { if (CB->isArgOperand(ArgUse.U)) { unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); - // Argument is only not clobbered when parameter is writeonly/readnone - // and nocapture. - bool IsClobber = !(CB->onlyWritesMemory(ArgNo) && - CB->paramHasAttr(ArgNo, Attribute::NoCapture)); + // Argument is a Write when parameter is writeonly/readnone + // and nocapture. Otherwise, it's a WriteWithSideEffect. + auto Access = CB->onlyWritesMemory(ArgNo) && + CB->paramHasAttr(ArgNo, Attribute::NoCapture) + ? ArgumentAccessInfo::AccessType::Write + : ArgumentAccessInfo::AccessType::WriteWithSideEffect; ConstantRangeList AccessRanges; if (IsInitialize && ArgUse.Offset) { Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); @@ -709,18 +699,18 @@ ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, for (ConstantRange &CR : CBCRL) AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, CR.getUpper() + *ArgUse.Offset)); - return {ArgumentAccessInfo::AccessType::Write, IsClobber, AccessRanges}; + return {Access, AccessRanges}; } } } - // Unrecognized instructions are considered clobbers. - return {ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; + // Other unrecognized instructions are considered as unknown. + return {ArgumentAccessInfo::AccessType::Unknown, {}}; } // Collect the uses of argument "A" in "F". -ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { +ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { auto &DL = F.getParent()->getDataLayout(); - auto PointerSize = + unsigned PointerSize = DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); ArgumentUsesSummary Result; @@ -733,22 +723,25 @@ ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { // Return true if the block of "I" has write accesses after updating. auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { auto *BB = I->getParent(); - auto &BBInfo = Result.UsesPerBlock.getOrInsertDefault(BB); + auto &BBInfo = Result.UsesPerBlock[BB]; bool AlreadyVisitedInst = BBInfo.Insts.contains(I); auto &IInfo = BBInfo.Insts[I]; // Instructions that have more than one use of the argument are considered // as clobbers. if (AlreadyVisitedInst) { - IInfo = {ArgumentAccessInfo::AccessType::Unknown, /*IsClobber=*/true, {}}; - BBInfo.HasClobber = true; + IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; + BBInfo.HasUnknownAccess = true; return false; } - IInfo = Info; - BBInfo.HasClobber |= IInfo.IsClobber; + IInfo = std::move(Info); + BBInfo.HasUnknownAccess |= + IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; bool InfoHasWrites = - IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && + (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || + IInfo.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) && !IInfo.AccessRanges.empty(); BBInfo.HasWrites |= InfoHasWrites; return InfoHasWrites; @@ -762,18 +755,19 @@ ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { // Add GEP uses to worklist. // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. if (auto *GEP = dyn_cast(U)) { - APInt Offset(PointerSize, 0, /*isSigned=*/true); - bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset); std::optional NewOffset = std::nullopt; - if (IsConstGEP && ArgUse.Offset) - NewOffset = *ArgUse.Offset + Offset.getSExtValue(); + if (ArgUse.Offset) { + APInt Offset(PointerSize, 0); + if (GEP->accumulateConstantOffset(DL, Offset)) + NewOffset = *ArgUse.Offset + Offset.getSExtValue(); + } for (Use &U : GEP->uses()) Worklist.push_back({&U, NewOffset}); continue; } auto *I = cast(U); - bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, ArgUse, DL)); + bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL)); Result.HasAnyWrite |= HasWrite; @@ -1070,7 +1064,7 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { } static bool inferInitializes(Argument &A, Function &F) { - auto ArgumentUses = CollectArgumentUsesPerBlock(A, F); + auto ArgumentUses = collectArgumentUsesPerBlock(A, F); // No write anywhere in the function, bail. if (!ArgumentUses.HasAnyWrite) return false; @@ -1091,7 +1085,7 @@ static bool inferInitializes(Argument &A, Function &F) { // If this block has any clobbering use, we're going to clear out the // ranges at some point in this block anyway, so don't bother looking at // successors. - if (UPB == UsesPerBlock.end() || !UPB->second.HasClobber) { + if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { bool HasAddedSuccessor = false; for (auto *Succ : successors(BB)) { if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { @@ -1120,10 +1114,14 @@ static bool inferInitializes(Argument &A, Function &F) { // From the end of the block to the beginning of the block, set // initializes ranges. for (auto &[_, Info] : reverse(Insts)) { - if (Info.IsClobber) + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || + Info.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) CRL = ConstantRangeList(); if (!Info.AccessRanges.empty()) { - if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) { + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || + Info.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) { CRL = CRL.unionWith(Info.AccessRanges); } else { assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); @@ -1143,7 +1141,7 @@ static bool inferInitializes(Argument &A, Function &F) { if (!OnlyScanEntryBlock) if (auto EntryUPB = UsesPerBlock.find(&EntryBB); EntryUPB != UsesPerBlock.end()) - OnlyScanEntryBlock = EntryUPB->second.HasClobber; + OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; if (OnlyScanEntryBlock) { EntryCRL = VisitBlock(&EntryBB); if (EntryCRL.empty())