diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h index 0d39339c4f7d7..eb537e5f11afc 100644 --- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h +++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h @@ -102,10 +102,14 @@ LLVM_ABI void fillMapFromAssume(AssumeInst &Assume, struct RetainedKnowledge { Attribute::AttrKind AttrKind = Attribute::None; uint64_t ArgValue = 0; + Value *IRArgValue = nullptr; Value *WasOn = nullptr; + RetainedKnowledge(Attribute::AttrKind AttrKind = Attribute::None, + uint64_t ArgValue = 0, Value *WasOn = nullptr) + : AttrKind(AttrKind), ArgValue(ArgValue), WasOn(WasOn) {} bool operator==(RetainedKnowledge Other) const { return AttrKind == Other.AttrKind && WasOn == Other.WasOn && - ArgValue == Other.ArgValue; + ArgValue == Other.ArgValue && IRArgValue == Other.IRArgValue; } bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); } /// This is only intended for use in std::min/std::max between attribute that diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp index 4c890a5d21b54..aa1cb3c26d63f 100644 --- a/llvm/lib/Analysis/AssumeBundleQueries.cpp +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -114,6 +114,9 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume, }; if (BOI.End - BOI.Begin > ABA_Argument) Result.ArgValue = GetArgOr1(0); + Result.IRArgValue = bundleHasArgument(BOI, ABA_Argument) + ? getValueFromBundleOpInfo(Assume, BOI, ABA_Argument) + : nullptr; if (Result.AttrKind == Attribute::Alignment) if (BOI.End - BOI.Begin > ABA_Argument + 1) Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1)); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 880249588f0b2..89e42ff4e8029 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -31,6 +31,40 @@ static bool isAligned(const Value *Base, Align Alignment, return Base->getPointerAlignment(DL) >= Alignment; } +static bool isDereferenceableAndAlignedPointerViaAssumption( + const Value *Ptr, Align Alignment, + function_ref CheckSize, + const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC, + const DominatorTree *DT) { + // Dereferenceable information from assumptions is only valid if the value + // cannot be freed between the assumption and use. For now just use the + // information for values that cannot be freed in the function. + // TODO: More precisely check if the pointer can be freed between assumption + // and use. + if (!CtxI || Ptr->canBeFreed()) + return false; + /// Look through assumes to see if both dereferencability and alignment can + /// be proven by an assume if needed. + RetainedKnowledge AlignRK; + RetainedKnowledge DerefRK; + bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment; + return getKnowledgeForValue( + Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, *AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (RK.AttrKind == Attribute::Alignment) + AlignRK = std::max(AlignRK, RK); + if (RK.AttrKind == Attribute::Dereferenceable) + DerefRK = std::max(DerefRK, RK); + IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); + if (IsAligned && DerefRK && CheckSize(DerefRK)) + return true; // We have found what we needed so we stop looking + return false; // Other assumes may have better information. so + // keep looking + }); +} + /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( @@ -169,38 +203,12 @@ static bool isDereferenceableAndAlignedPointer( Size, DL, CtxI, AC, DT, TLI, Visited, MaxDepth); - // Dereferenceable information from assumptions is only valid if the value - // cannot be freed between the assumption and use. For now just use the - // information for values that cannot be freed in the function. - // TODO: More precisely check if the pointer can be freed between assumption - // and use. - if (CtxI && AC && !V->canBeFreed()) { - /// Look through assumes to see if both dereferencability and alignment can - /// be proven by an assume if needed. - RetainedKnowledge AlignRK; - RetainedKnowledge DerefRK; - bool IsAligned = V->getPointerAlignment(DL) >= Alignment; - if (getKnowledgeForValue( - V, {Attribute::Dereferenceable, Attribute::Alignment}, *AC, - [&](RetainedKnowledge RK, Instruction *Assume, auto) { - if (!isValidAssumeForContext(Assume, CtxI, DT)) - return false; - if (RK.AttrKind == Attribute::Alignment) - AlignRK = std::max(AlignRK, RK); - if (RK.AttrKind == Attribute::Dereferenceable) - DerefRK = std::max(DerefRK, RK); - IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); - if (IsAligned && DerefRK && - DerefRK.ArgValue >= Size.getZExtValue()) - return true; // We have found what we needed so we stop looking - return false; // Other assumes may have better information. so - // keep looking - })) - return true; - } - - // If we don't know, assume the worst. - return false; + return AC && isDereferenceableAndAlignedPointerViaAssumption( + V, Alignment, + [Size](const RetainedKnowledge &RK) { + return RK.ArgValue >= Size.getZExtValue(); + }, + DL, CtxI, AC, DT); } bool llvm::isDereferenceableAndAlignedPointer( @@ -317,8 +325,8 @@ bool llvm::isDereferenceableAndAlignedInLoop( return false; const SCEV *MaxBECount = - Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) - : SE.getConstantMaxBackedgeTakenCount(L); + Predicates ? SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates) + : SE.getSymbolicMaxBackedgeTakenCount(L); const SCEV *BECount = Predicates ? SE.getPredicatedBackedgeTakenCount(L, *Predicates) : SE.getBackedgeTakenCount(L); @@ -339,9 +347,11 @@ bool llvm::isDereferenceableAndAlignedInLoop( Value *Base = nullptr; APInt AccessSize; + const SCEV *AccessSizeSCEV = nullptr; if (const SCEVUnknown *NewBase = dyn_cast(AccessStart)) { Base = NewBase->getValue(); AccessSize = MaxPtrDiff; + AccessSizeSCEV = PtrDiff; } else if (auto *MinAdd = dyn_cast(AccessStart)) { if (MinAdd->getNumOperands() != 2) return false; @@ -365,12 +375,20 @@ bool llvm::isDereferenceableAndAlignedInLoop( return false; AccessSize = MaxPtrDiff + Offset->getAPInt(); + AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset); Base = NewBase->getValue(); } else return false; Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt(); - return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + return isDereferenceableAndAlignedPointerViaAssumption( + Base, Alignment, + [&SE, AccessSizeSCEV](const RetainedKnowledge &RK) { + return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV, + SE.getSCEV(RK.IRArgValue)); + }, + DL, HeaderFirstNonPHI, AC, &DT) || + isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, HeaderFirstNonPHI, AC, &DT); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index eb747bc48a8a5..8004077b92665 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5604,6 +5604,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "third argument should be an integer if present", Call); continue; } + if (Kind == Attribute::Dereferenceable) { + Check(ArgCount == 2, + "dereferenceable assumptions should have 2 arguments", Call); + Check(Call.getOperand(Elem.Begin)->getType()->isPointerTy(), + "first argument should be a pointer", Call); + Check(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(), + "second argument should be an integer", Call); + continue; + } Check(ArgCount <= 2, "too many arguments", Call); if (Kind == Attribute::None) break; diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll new file mode 100644 index 0000000000000..e771c408358a1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -0,0 +1,557 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s + +declare void @llvm.assume(i1) + +; %a is known dereferenceable via assume for the whole loop. +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i8 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i8 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i8 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv + %l.b = load i8, ptr %gep.b, align 1 + %c.1 = icmp sge i8 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i8, ptr %gep.a, align 1 + br label %loop.latch + +loop.latch: + %merge = phi i8 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv + store i8 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +; %a is known dereferenceable via assume for the whole loop. +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %mul = mul nsw nuw i64 %n, 4 + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %mul) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 1 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 1 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + + +; %a is NOT known dereferenceable via assume for the whole loop. +define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 1 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 1 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +; %a is NOT known dereferenceable via assume for the whole loop. +define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 100) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 100) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 1 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 1 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +; %a is known dereferenceable via assume for the whole loop, alignment is known via function attribute. +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute(ptr noalias noundef align 4 %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute( +; CHECK-SAME: ptr noalias noundef align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %mul = mul nsw nuw i64 %n, 4 + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 %mul) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +; Alignment via argument attribute is too small (1 but needs 4). +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute_too_small(ptr noalias noundef align 1 %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_align_attribute_too_small( +; CHECK-SAME: ptr noalias noundef align 1 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 +; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %mul = mul nsw nuw i64 %n, 4 + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 %mul) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 13fd2eff87290..84d5ceeb601b6 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -165,6 +165,35 @@ exit: ret void } +define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds' +; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +entry: + %n_bytes = mul nuw nsw i32 %n, 2 + call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ] + %tc = sext i32 %n to i64 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, %tc + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + define void @loop_contains_store_to_pointer_with_no_deref_info(ptr align 2 dereferenceable(40) readonly %load.array, ptr align 2 noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_to_pointer_with_no_deref_info' ; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 0fe893abec86c..e79995f673fb4 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -94,3 +94,50 @@ loop.end: %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ] ret i64 %retval } + +define i64 @early_exit_alignment_and_deref_known_via_assumption(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync { +; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption( +; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 [[N]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 [[N]]) ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 %n) ] + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 %n) ] + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, %n + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ] + ret i64 %retval +} diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll index 047e8d9bba3ed..d8037b965edb5 100644 --- a/llvm/test/Verifier/assume-bundles.ll +++ b/llvm/test/Verifier/assume-bundles.ll @@ -7,13 +7,13 @@ define void @func(ptr %P, i32 %P1, ptr %P2, ptr %P3) { ; CHECK: tags must be valid attribute names ; CHECK: "adazdazd" call void @llvm.assume(i1 true) ["adazdazd"()] -; CHECK: the second argument should be a constant integral value +; CHECK-NOT: call{{.+}}deref call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 %P1)] -; CHECK: the second argument should be a constant integral value +; CHECK: second argument should be an integer call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, float 1.5)] -; CHECK: too many arguments +; CHECK: dereferenceable assumptions should have 2 arguments call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 8, i32 8)] -; CHECK: this attribute should have 2 arguments +; CHECK: dereferenceable assumptions should have 2 arguments call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P)] ; CHECK: this attribute has no argument call void @llvm.assume(i1 true) ["dereferenceable"(ptr %P, i32 4), "cold"(ptr %P)] @@ -30,7 +30,7 @@ define void @func(ptr %P, i32 %P1, ptr %P2, ptr %P3) { call void @llvm.assume(i1 true) ["separate_storage"(ptr %P)] ; CHECK: arguments to separate_storage assumptions should be pointers call void @llvm.assume(i1 true) ["separate_storage"(ptr %P, i32 123)] -; CHECK: this attribute should have 2 arguments +; CHECK: dereferenceable assumptions should have 2 arguments call void @llvm.assume(i1 true) ["align"(ptr %P, i32 4), "dereferenceable"(ptr %P)] ret void }