diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h index 56f44b7dc6b2a..ef6e0e0687809 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomVectorize.h @@ -13,7 +13,22 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { -struct LoopIdiomVectorizePass : PassInfoMixin { +enum class LoopIdiomVectorizeStyle { Masked, Predicated }; + +class LoopIdiomVectorizePass : public PassInfoMixin { + LoopIdiomVectorizeStyle VectorizeStyle = LoopIdiomVectorizeStyle::Masked; + + // The VF used in vectorizing the byte compare pattern. + unsigned ByteCompareVF = 16; + +public: + LoopIdiomVectorizePass() = default; + explicit LoopIdiomVectorizePass(LoopIdiomVectorizeStyle S) + : VectorizeStyle(S) {} + + LoopIdiomVectorizePass(LoopIdiomVectorizeStyle S, unsigned BCVF) + : VectorizeStyle(S), ByteCompareVF(BCVF) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f76aef742290c..c132a6ef9611c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -33,10 +33,12 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" #include using namespace llvm; @@ -572,6 +574,13 @@ void RISCVPassConfig::addPostRegAlloc() { addPass(createRISCVRedundantCopyEliminationPass()); } +void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { + PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM, + OptimizationLevel Level) { + LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated)); + }); +} + yaml::MachineFunctionInfo * RISCVTargetMachine::createDefaultFuncInfoYAML() const { return new yaml::RISCVMachineFunctionInfo(); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h index 68dfb3c81f2fe..ce7b7907e1f3a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -59,6 +59,7 @@ class RISCVTargetMachine : public LLVMTargetMachine { PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override; + void registerPassBuilderCallbacks(PassBuilder &PB) override; }; } // namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index c4d10aada1f4c..9c37a4f6ec2d0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase { bool shouldFoldTerminatingConditionAfterLSR() const { return true; } + + std::optional getMinPageSize() const { return 4096; } }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp index c7a8700e14531..64e04cae2773f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp @@ -59,19 +59,34 @@ static cl::opt DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden, cl::init(false), cl::desc("Disable Loop Idiom Vectorize Pass.")); +static cl::opt + LITVecStyle("loop-idiom-vectorize-style", cl::Hidden, + cl::desc("The vectorization style for loop idiom transform."), + cl::values(clEnumValN(LoopIdiomVectorizeStyle::Masked, "masked", + "Use masked vector intrinsics"), + clEnumValN(LoopIdiomVectorizeStyle::Predicated, + "predicated", "Use VP intrinsics")), + cl::init(LoopIdiomVectorizeStyle::Masked)); + static cl::opt DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden, cl::init(false), cl::desc("Proceed with Loop Idiom Vectorize Pass, but do " "not convert byte-compare loop(s).")); +static cl::opt + ByteCmpVF("loop-idiom-vectorize-bytecmp-vf", cl::Hidden, + cl::desc("The vectorization factor for byte-compare patterns."), + cl::init(16)); + static cl::opt VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false), cl::desc("Verify loops generated Loop Idiom Vectorize Pass.")); namespace { - class LoopIdiomVectorize { + LoopIdiomVectorizeStyle VectorizeStyle; + unsigned ByteCompareVF; Loop *CurLoop = nullptr; DominatorTree *DT; LoopInfo *LI; @@ -86,10 +101,11 @@ class LoopIdiomVectorize { BasicBlock *VectorLoopIncBlock = nullptr; public: - explicit LoopIdiomVectorize(DominatorTree *DT, LoopInfo *LI, - const TargetTransformInfo *TTI, - const DataLayout *DL) - : DT(DT), LI(LI), TTI(TTI), DL(DL) {} + LoopIdiomVectorize(LoopIdiomVectorizeStyle S, unsigned VF, DominatorTree *DT, + LoopInfo *LI, const TargetTransformInfo *TTI, + const DataLayout *DL) + : VectorizeStyle(S), ByteCompareVF(VF), DT(DT), LI(LI), TTI(TTI), DL(DL) { + } bool run(Loop *L); @@ -111,6 +127,10 @@ class LoopIdiomVectorize { GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, Value *ExtStart, Value *ExtEnd); + Value *createPredicatedFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU, + GetElementPtrInst *GEPA, + GetElementPtrInst *GEPB, Value *ExtStart, + Value *ExtEnd); void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, PHINode *IndPhi, Value *MaxLen, Instruction *Index, @@ -128,8 +148,16 @@ PreservedAnalyses LoopIdiomVectorizePass::run(Loop &L, LoopAnalysisManager &AM, const auto *DL = &L.getHeader()->getDataLayout(); - LoopIdiomVectorize LIT(&AR.DT, &AR.LI, &AR.TTI, DL); - if (!LIT.run(&L)) + LoopIdiomVectorizeStyle VecStyle = VectorizeStyle; + if (LITVecStyle.getNumOccurrences()) + VecStyle = LITVecStyle; + + unsigned BCVF = ByteCompareVF; + if (ByteCmpVF.getNumOccurrences()) + BCVF = ByteCmpVF; + + LoopIdiomVectorize LIV(VecStyle, BCVF, &AR.DT, &AR.LI, &AR.TTI, DL); + if (!LIV.run(&L)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -354,20 +382,16 @@ Value *LoopIdiomVectorize::createMaskedFindMismatch( Value *PtrA = GEPA->getPointerOperand(); Value *PtrB = GEPB->getPointerOperand(); - // At this point we know two things must be true: - // 1. Start <= End - // 2. ExtMaxLen <= MinPageSize due to the page checks. - // Therefore, we know that we can use a 64-bit induction variable that - // starts from 0 -> ExtMaxLen and it will not overflow. ScalableVectorType *PredVTy = - ScalableVectorType::get(Builder.getInt1Ty(), 16); + ScalableVectorType::get(Builder.getInt1Ty(), ByteCompareVF); Value *InitialPred = Builder.CreateIntrinsic( Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd}); Value *VecLen = Builder.CreateIntrinsic(Intrinsic::vscale, {I64Type}, {}); - VecLen = Builder.CreateMul(VecLen, ConstantInt::get(I64Type, 16), "", - /*HasNUW=*/true, /*HasNSW=*/true); + VecLen = + Builder.CreateMul(VecLen, ConstantInt::get(I64Type, ByteCompareVF), "", + /*HasNUW=*/true, /*HasNSW=*/true); Value *PFalse = Builder.CreateVectorSplat(PredVTy->getElementCount(), Builder.getInt1(false)); @@ -385,7 +409,8 @@ Value *LoopIdiomVectorize::createMaskedFindMismatch( LoopPred->addIncoming(InitialPred, VectorLoopPreheaderBlock); PHINode *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vec_index"); VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock); - Type *VectorLoadType = ScalableVectorType::get(Builder.getInt8Ty(), 16); + Type *VectorLoadType = + ScalableVectorType::get(Builder.getInt8Ty(), ByteCompareVF); Value *Passthru = ConstantInt::getNullValue(VectorLoadType); Value *VectorLhsGep = @@ -454,6 +479,109 @@ Value *LoopIdiomVectorize::createMaskedFindMismatch( return Builder.CreateTrunc(VectorLoopRes64, ResType); } +Value *LoopIdiomVectorize::createPredicatedFindMismatch( + IRBuilder<> &Builder, DomTreeUpdater &DTU, GetElementPtrInst *GEPA, + GetElementPtrInst *GEPB, Value *ExtStart, Value *ExtEnd) { + Type *I64Type = Builder.getInt64Ty(); + Type *I32Type = Builder.getInt32Ty(); + Type *ResType = I32Type; + Type *LoadType = Builder.getInt8Ty(); + Value *PtrA = GEPA->getPointerOperand(); + Value *PtrB = GEPB->getPointerOperand(); + + auto *JumpToVectorLoop = BranchInst::Create(VectorLoopStartBlock); + Builder.Insert(JumpToVectorLoop); + + DTU.applyUpdates({{DominatorTree::Insert, VectorLoopPreheaderBlock, + VectorLoopStartBlock}}); + + // Set up the first Vector loop block by creating the PHIs, doing the vector + // loads and comparing the vectors. + Builder.SetInsertPoint(VectorLoopStartBlock); + auto *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vector_index"); + VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock); + + // Calculate AVL by subtracting the vector loop index from the trip count + Value *AVL = Builder.CreateSub(ExtEnd, VectorIndexPhi, "avl", /*HasNUW=*/true, + /*HasNSW=*/true); + + auto *VectorLoadType = ScalableVectorType::get(LoadType, ByteCompareVF); + auto *VF = ConstantInt::get(I32Type, ByteCompareVF); + + Value *VL = Builder.CreateIntrinsic(Intrinsic::experimental_get_vector_length, + {I64Type}, {AVL, VF, Builder.getTrue()}); + Value *GepOffset = VectorIndexPhi; + + Value *VectorLhsGep = + Builder.CreateGEP(LoadType, PtrA, GepOffset, "", GEPA->isInBounds()); + VectorType *TrueMaskTy = + VectorType::get(Builder.getInt1Ty(), VectorLoadType->getElementCount()); + Value *AllTrueMask = Constant::getAllOnesValue(TrueMaskTy); + Value *VectorLhsLoad = Builder.CreateIntrinsic( + Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()}, + {VectorLhsGep, AllTrueMask, VL}, nullptr, "lhs.load"); + + Value *VectorRhsGep = + Builder.CreateGEP(LoadType, PtrB, GepOffset, "", GEPB->isInBounds()); + Value *VectorRhsLoad = Builder.CreateIntrinsic( + Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()}, + {VectorRhsGep, AllTrueMask, VL}, nullptr, "rhs.load"); + + StringRef PredicateStr = CmpInst::getPredicateName(CmpInst::ICMP_NE); + auto *PredicateMDS = MDString::get(VectorLhsLoad->getContext(), PredicateStr); + Value *Pred = MetadataAsValue::get(VectorLhsLoad->getContext(), PredicateMDS); + Value *VectorMatchCmp = Builder.CreateIntrinsic( + Intrinsic::vp_icmp, {VectorLhsLoad->getType()}, + {VectorLhsLoad, VectorRhsLoad, Pred, AllTrueMask, VL}, nullptr, + "mismatch.cmp"); + Value *CTZ = Builder.CreateIntrinsic( + Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->getType()}, + {VectorMatchCmp, /*ZeroIsPoison=*/Builder.getInt1(false), AllTrueMask, + VL}); + Value *MismatchFound = Builder.CreateICmpNE(CTZ, VL); + auto *VectorEarlyExit = BranchInst::Create(VectorLoopMismatchBlock, + VectorLoopIncBlock, MismatchFound); + Builder.Insert(VectorEarlyExit); + + DTU.applyUpdates( + {{DominatorTree::Insert, VectorLoopStartBlock, VectorLoopMismatchBlock}, + {DominatorTree::Insert, VectorLoopStartBlock, VectorLoopIncBlock}}); + + // Increment the index counter and calculate the predicate for the next + // iteration of the loop. We branch back to the start of the loop if there + // is at least one active lane. + Builder.SetInsertPoint(VectorLoopIncBlock); + Value *VL64 = Builder.CreateZExt(VL, I64Type); + Value *NewVectorIndexPhi = + Builder.CreateAdd(VectorIndexPhi, VL64, "", + /*HasNUW=*/true, /*HasNSW=*/true); + VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock); + Value *ExitCond = Builder.CreateICmpNE(NewVectorIndexPhi, ExtEnd); + auto *VectorLoopBranchBack = + BranchInst::Create(VectorLoopStartBlock, EndBlock, ExitCond); + Builder.Insert(VectorLoopBranchBack); + + DTU.applyUpdates( + {{DominatorTree::Insert, VectorLoopIncBlock, VectorLoopStartBlock}, + {DominatorTree::Insert, VectorLoopIncBlock, EndBlock}}); + + // If we found a mismatch then we need to calculate which lane in the vector + // had a mismatch and add that on to the current loop index. + Builder.SetInsertPoint(VectorLoopMismatchBlock); + + // Add LCSSA phis for CTZ and VectorIndexPhi. + auto *CTZLCSSAPhi = Builder.CreatePHI(CTZ->getType(), 1, "ctz"); + CTZLCSSAPhi->addIncoming(CTZ, VectorLoopStartBlock); + auto *VectorIndexLCSSAPhi = + Builder.CreatePHI(VectorIndexPhi->getType(), 1, "mismatch_vector_index"); + VectorIndexLCSSAPhi->addIncoming(VectorIndexPhi, VectorLoopStartBlock); + + Value *CTZI64 = Builder.CreateZExt(CTZLCSSAPhi, I64Type); + Value *VectorLoopRes64 = Builder.CreateAdd(VectorIndexLCSSAPhi, CTZI64, "", + /*HasNUW=*/true, /*HasNSW=*/true); + return Builder.CreateTrunc(VectorLoopRes64, ResType); +} + Value *LoopIdiomVectorize::expandFindMismatch( IRBuilder<> &Builder, DomTreeUpdater &DTU, GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, Instruction *Index, Value *Start, Value *MaxLen) { @@ -613,8 +741,22 @@ Value *LoopIdiomVectorize::expandFindMismatch( // processed in each iteration, etc. Builder.SetInsertPoint(VectorLoopPreheaderBlock); - Value *VectorLoopRes = - createMaskedFindMismatch(Builder, DTU, GEPA, GEPB, ExtStart, ExtEnd); + // At this point we know two things must be true: + // 1. Start <= End + // 2. ExtMaxLen <= MinPageSize due to the page checks. + // Therefore, we know that we can use a 64-bit induction variable that + // starts from 0 -> ExtMaxLen and it will not overflow. + Value *VectorLoopRes = nullptr; + switch (VectorizeStyle) { + case LoopIdiomVectorizeStyle::Masked: + VectorLoopRes = + createMaskedFindMismatch(Builder, DTU, GEPA, GEPB, ExtStart, ExtEnd); + break; + case LoopIdiomVectorizeStyle::Predicated: + VectorLoopRes = createPredicatedFindMismatch(Builder, DTU, GEPA, GEPB, + ExtStart, ExtEnd); + break; + } Builder.Insert(BranchInst::Create(EndBlock)); diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll new file mode 100644 index 0000000000000..3107d4e044cae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s + +; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll + +define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %n) { +; CHECK-LABEL: compare_bytes_simple: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiw a4, a2, 1 +; CHECK-NEXT: bltu a3, a4, .LBB0_7 +; CHECK-NEXT: # %bb.1: # %mismatch_mem_check +; CHECK-NEXT: slli a2, a4, 32 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: slli a5, a3, 32 +; CHECK-NEXT: srli a5, a5, 32 +; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: add a7, a0, a5 +; CHECK-NEXT: srli a6, a6, 12 +; CHECK-NEXT: srli a7, a7, 12 +; CHECK-NEXT: bne a6, a7, .LBB0_7 +; CHECK-NEXT: # %bb.2: # %mismatch_mem_check +; CHECK-NEXT: add a6, a1, a2 +; CHECK-NEXT: add a7, a1, a5 +; CHECK-NEXT: srli a6, a6, 12 +; CHECK-NEXT: srli a7, a7, 12 +; CHECK-NEXT: bne a6, a7, .LBB0_7 +; CHECK-NEXT: .LBB0_3: # %mismatch_vec_loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub a4, a5, a2 +; CHECK-NEXT: vsetvli a4, a4, e8, m2, ta, ma +; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: vle8.v v8, (a6) +; CHECK-NEXT: add a6, a1, a2 +; CHECK-NEXT: vle8.v v10, (a6) +; CHECK-NEXT: vmsne.vv v12, v8, v10 +; CHECK-NEXT: vfirst.m a7, v12 +; CHECK-NEXT: mv a6, a4 +; CHECK-NEXT: bltz a7, .LBB0_5 +; CHECK-NEXT: # %bb.4: # %mismatch_vec_loop +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: .LBB0_5: # %mismatch_vec_loop +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: sext.w a7, a6 +; CHECK-NEXT: bne a7, a4, .LBB0_11 +; CHECK-NEXT: # %bb.6: # %mismatch_vec_loop_inc +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: bne a2, a5, .LBB0_3 +; CHECK-NEXT: j .LBB0_9 +; CHECK-NEXT: .LBB0_7: # %mismatch_loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slli a2, a4, 32 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: lbu a5, 0(a5) +; CHECK-NEXT: add a2, a1, a2 +; CHECK-NEXT: lbu a2, 0(a2) +; CHECK-NEXT: bne a5, a2, .LBB0_10 +; CHECK-NEXT: # %bb.8: # %mismatch_loop_inc +; CHECK-NEXT: # in Loop: Header=BB0_7 Depth=1 +; CHECK-NEXT: addiw a4, a4, 1 +; CHECK-NEXT: bne a3, a4, .LBB0_7 +; CHECK-NEXT: .LBB0_9: # %while.end +; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_10: +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_11: # %mismatch_vec_loop_found +; CHECK-NEXT: slli a6, a6, 32 +; CHECK-NEXT: srli a3, a6, 32 +; CHECK-NEXT: add a0, a2, a3 +; CHECK-NEXT: ret +entry: + %0 = add i32 %len, 1 + br label %mismatch_min_it_check + +mismatch_min_it_check: ; preds = %entry + %1 = zext i32 %0 to i64 + %2 = zext i32 %n to i64 + %3 = icmp ule i32 %0, %n + br i1 %3, label %mismatch_mem_check, label %mismatch_loop_pre + +mismatch_mem_check: ; preds = %mismatch_min_it_check + %4 = getelementptr i8, ptr %a, i64 %1 + %5 = getelementptr i8, ptr %b, i64 %1 + %6 = ptrtoint ptr %5 to i64 + %7 = ptrtoint ptr %4 to i64 + %8 = getelementptr i8, ptr %a, i64 %2 + %9 = getelementptr i8, ptr %b, i64 %2 + %10 = ptrtoint ptr %8 to i64 + %11 = ptrtoint ptr %9 to i64 + %12 = lshr i64 %7, 12 + %13 = lshr i64 %10, 12 + %14 = lshr i64 %6, 12 + %15 = lshr i64 %11, 12 + %16 = icmp ne i64 %12, %13 + %17 = icmp ne i64 %14, %15 + %18 = or i1 %16, %17 + br i1 %18, label %mismatch_loop_pre, label %mismatch_vec_loop_preheader + +mismatch_vec_loop_preheader: ; preds = %mismatch_mem_check + br label %mismatch_vec_loop + +mismatch_vec_loop: ; preds = %mismatch_vec_loop_inc, %mismatch_vec_loop_preheader + %mismatch_vector_index = phi i64 [ %1, %mismatch_vec_loop_preheader ], [ %25, %mismatch_vec_loop_inc ] + %avl = sub nuw nsw i64 %2, %mismatch_vector_index + %19 = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 16, i1 true) + %20 = getelementptr inbounds i8, ptr %a, i64 %mismatch_vector_index + %lhs.load = call @llvm.vp.load.nxv16i8.p0(ptr %20, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %19) + %21 = getelementptr inbounds i8, ptr %b, i64 %mismatch_vector_index + %rhs.load = call @llvm.vp.load.nxv16i8.p0(ptr %21, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %19) + %mismatch.cmp = call @llvm.vp.icmp.nxv16i8( %lhs.load, %rhs.load, metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %19) + %22 = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( %mismatch.cmp, i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %19) + %23 = icmp ne i32 %22, %19 + br i1 %23, label %mismatch_vec_loop_found, label %mismatch_vec_loop_inc + +mismatch_vec_loop_inc: ; preds = %mismatch_vec_loop + %24 = zext i32 %19 to i64 + %25 = add nuw nsw i64 %mismatch_vector_index, %24 + %26 = icmp ne i64 %25, %2 + br i1 %26, label %mismatch_vec_loop, label %mismatch_end + +mismatch_vec_loop_found: ; preds = %mismatch_vec_loop + %ctz = phi i32 [ %22, %mismatch_vec_loop ] + %mismatch_vector_index1 = phi i64 [ %mismatch_vector_index, %mismatch_vec_loop ] + %27 = zext i32 %ctz to i64 + %28 = add nuw nsw i64 %mismatch_vector_index1, %27 + %29 = trunc i64 %28 to i32 + br label %mismatch_end + +mismatch_loop_pre: ; preds = %mismatch_mem_check, %mismatch_min_it_check + br label %mismatch_loop + +mismatch_loop: ; preds = %mismatch_loop_inc, %mismatch_loop_pre + %mismatch_index = phi i32 [ %0, %mismatch_loop_pre ], [ %36, %mismatch_loop_inc ] + %30 = zext i32 %mismatch_index to i64 + %31 = getelementptr inbounds i8, ptr %a, i64 %30 + %32 = load i8, ptr %31, align 1 + %33 = getelementptr inbounds i8, ptr %b, i64 %30 + %34 = load i8, ptr %33, align 1 + %35 = icmp eq i8 %32, %34 + br i1 %35, label %mismatch_loop_inc, label %mismatch_end + +mismatch_loop_inc: ; preds = %mismatch_loop + %36 = add i32 %mismatch_index, 1 + %37 = icmp eq i32 %36, %n + br i1 %37, label %mismatch_end, label %mismatch_loop + +mismatch_end: ; preds = %mismatch_loop_inc, %mismatch_loop, %mismatch_vec_loop_found, %mismatch_vec_loop_inc + %mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ] + br i1 true, label %byte.compare, label %while.cond + +while.cond: ; preds = %mismatch_end, %while.body + %len.addr = phi i32 [ %len, %mismatch_end ], [ %mismatch_result, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %mismatch_result, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %idxprom = zext i32 %mismatch_result to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %38 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %39 = load i8, ptr %arrayidx2, align 1 + %cmp.not2 = icmp eq i8 %38, %39 + br i1 %cmp.not2, label %while.cond, label %while.end + +byte.compare: ; preds = %mismatch_end + br label %while.end + +while.end: ; preds = %byte.compare, %while.body, %while.cond + %inc.lcssa = phi i32 [ %mismatch_result, %while.body ], [ %mismatch_result, %while.cond ], [ %mismatch_result, %byte.compare ] + ret i32 %inc.lcssa +} + diff --git a/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll new file mode 100644 index 0000000000000..8cf761055bd38 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll @@ -0,0 +1,2309 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s | FileCheck %s +; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -loop-idiom-vectorize-bytecmp-vf=64 -mattr=+v -S < %s | FileCheck %s --check-prefix=LMUL8 +; RUN: opt -passes='loop(loop-idiom-vectorize),simplifycfg' -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s | FileCheck %s --check-prefix=LOOP-DEL +; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=masked -mattr=+v -S < %s | FileCheck %s --check-prefix=MASKED + +define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) { +; CHECK-LABEL: define i32 @compare_bytes_simple( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; CHECK-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; CHECK: mismatch_min_it_check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: mismatch_mem_check: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: mismatch_vec_loop_preheader: +; CHECK-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; CHECK: mismatch_vec_loop: +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; CHECK: mismatch_vec_loop_inc: +; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; CHECK: mismatch_vec_loop_found: +; CHECK-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; CHECK-NEXT: br label [[MISMATCH_END]] +; CHECK: mismatch_loop_pre: +; CHECK-NEXT: br label [[MISMATCH_LOOP:%.*]] +; CHECK: mismatch_loop: +; CHECK-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; CHECK: mismatch_loop_inc: +; CHECK-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; CHECK: mismatch_end: +; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: byte.compare: +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: ret i32 [[INC_LCSSA]] +; +; LMUL8-LABEL: define i32 @compare_bytes_simple( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LMUL8-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; LMUL8: mismatch_min_it_check: +; LMUL8-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LMUL8-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LMUL8-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LMUL8-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]] +; LMUL8: mismatch_mem_check: +; LMUL8-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LMUL8-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LMUL8-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LMUL8-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LMUL8-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LMUL8-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LMUL8-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LMUL8-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LMUL8-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LMUL8-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LMUL8-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]] +; LMUL8: mismatch_vec_loop_preheader: +; LMUL8-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; LMUL8: mismatch_vec_loop: +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) +; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv64i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LMUL8: mismatch_vec_loop_inc: +; LMUL8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LMUL8-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LMUL8-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LMUL8-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; LMUL8: mismatch_vec_loop_found: +; LMUL8-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LMUL8-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LMUL8-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LMUL8-NEXT: br label [[MISMATCH_END]] +; LMUL8: mismatch_loop_pre: +; LMUL8-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LMUL8: mismatch_loop: +; LMUL8-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LMUL8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LMUL8-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LMUL8-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LMUL8-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; LMUL8: mismatch_loop_inc: +; LMUL8-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; LMUL8-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LMUL8-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; LMUL8: mismatch_end: +; LMUL8-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LMUL8-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LMUL8: byte.compare: +; LMUL8-NEXT: br label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: ret i32 [[INC_LCSSA]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_simple( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LOOP-DEL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]] +; LOOP-DEL: mismatch_mem_check: +; LOOP-DEL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LOOP-DEL-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LOOP-DEL-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LOOP-DEL-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LOOP-DEL-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LOOP-DEL-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1:![0-9]+]] +; LOOP-DEL: mismatch_vec_loop: +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ] +; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LOOP-DEL: mismatch_vec_loop_inc: +; LOOP-DEL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LOOP-DEL-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LOOP-DEL-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LOOP-DEL-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END:%.*]] +; LOOP-DEL: mismatch_vec_loop_found: +; LOOP-DEL-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LOOP-DEL-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LOOP-DEL-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LOOP-DEL-NEXT: br label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_pre: +; LOOP-DEL-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LOOP-DEL: mismatch_loop: +; LOOP-DEL-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LOOP-DEL-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LOOP-DEL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LOOP-DEL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LOOP-DEL-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LOOP-DEL-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_inc: +; LOOP-DEL-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; LOOP-DEL-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LOOP-DEL-NEXT: ret i32 [[MISMATCH_RESULT]] +; +; MASKED-LABEL: define i32 @compare_bytes_simple( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; MASKED-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; MASKED: mismatch_min_it_check: +; MASKED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; MASKED-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; MASKED-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; MASKED-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0:![0-9]+]] +; MASKED: mismatch_mem_check: +; MASKED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; MASKED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; MASKED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; MASKED-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; MASKED-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12 +; MASKED-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12 +; MASKED-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12 +; MASKED-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12 +; MASKED-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]] +; MASKED-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]] +; MASKED-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; MASKED-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1:![0-9]+]] +; MASKED: mismatch_vec_loop_preheader: +; MASKED-NEXT: [[TMP19:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MASKED-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16 +; MASKED-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]] +; MASKED: mismatch_vec_loop: +; MASKED-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ] +; MASKED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP23:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP25:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP26:%.*]] = icmp ne [[TMP23]], [[TMP25]] +; MASKED-NEXT: [[TMP27:%.*]] = select [[MISMATCH_VEC_LOOP_PRED]], [[TMP26]], zeroinitializer +; MASKED-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP27]]) +; MASKED-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]] +; MASKED: mismatch_vec_loop_inc: +; MASKED-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]] +; MASKED-NEXT: [[TMP30]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i64 0 +; MASKED-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]] +; MASKED: mismatch_vec_loop_found: +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[TMP32:%.*]] = and [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]] +; MASKED-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( [[TMP32]], i1 true) +; MASKED-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64 +; MASKED-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]] +; MASKED-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 +; MASKED-NEXT: br label [[MISMATCH_END]] +; MASKED: mismatch_loop_pre: +; MASKED-NEXT: br label [[MISMATCH_LOOP:%.*]] +; MASKED: mismatch_loop: +; MASKED-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; MASKED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1 +; MASKED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 +; MASKED-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]] +; MASKED-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; MASKED: mismatch_loop_inc: +; MASKED-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1 +; MASKED-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]] +; MASKED-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; MASKED: mismatch_end: +; MASKED-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ] +; MASKED-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; MASKED: byte.compare: +; MASKED-NEXT: br label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: ret i32 [[INC_LCSSA]] +; +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + ret i32 %inc.lcssa +} + +define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) { +; CHECK-LABEL: define i32 @compare_bytes_signed_wrap( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; CHECK-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; CHECK: mismatch_min_it_check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; CHECK: mismatch_mem_check: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; CHECK: mismatch_vec_loop_preheader: +; CHECK-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; CHECK: mismatch_vec_loop: +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; CHECK: mismatch_vec_loop_inc: +; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; CHECK: mismatch_vec_loop_found: +; CHECK-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; CHECK-NEXT: br label [[MISMATCH_END]] +; CHECK: mismatch_loop_pre: +; CHECK-NEXT: br label [[MISMATCH_LOOP:%.*]] +; CHECK: mismatch_loop: +; CHECK-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; CHECK: mismatch_loop_inc: +; CHECK-NEXT: [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; CHECK: mismatch_end: +; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: byte.compare: +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: ret i32 [[INC_LCSSA]] +; +; LMUL8-LABEL: define i32 @compare_bytes_signed_wrap( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LMUL8-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; LMUL8: mismatch_min_it_check: +; LMUL8-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LMUL8-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LMUL8-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LMUL8-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LMUL8: mismatch_mem_check: +; LMUL8-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LMUL8-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LMUL8-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LMUL8-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LMUL8-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LMUL8-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LMUL8-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LMUL8-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LMUL8-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LMUL8-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LMUL8-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; LMUL8: mismatch_vec_loop_preheader: +; LMUL8-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; LMUL8: mismatch_vec_loop: +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) +; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv64i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LMUL8: mismatch_vec_loop_inc: +; LMUL8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LMUL8-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LMUL8-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LMUL8-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; LMUL8: mismatch_vec_loop_found: +; LMUL8-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LMUL8-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LMUL8-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LMUL8-NEXT: br label [[MISMATCH_END]] +; LMUL8: mismatch_loop_pre: +; LMUL8-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LMUL8: mismatch_loop: +; LMUL8-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LMUL8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LMUL8-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LMUL8-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LMUL8-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; LMUL8: mismatch_loop_inc: +; LMUL8-NEXT: [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1 +; LMUL8-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LMUL8-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; LMUL8: mismatch_end: +; LMUL8-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LMUL8-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LMUL8: byte.compare: +; LMUL8-NEXT: br label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: ret i32 [[INC_LCSSA]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_signed_wrap( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LOOP-DEL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LOOP-DEL: mismatch_mem_check: +; LOOP-DEL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LOOP-DEL-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LOOP-DEL-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LOOP-DEL-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LOOP-DEL-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LOOP-DEL-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]] +; LOOP-DEL: mismatch_vec_loop: +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ] +; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LOOP-DEL: mismatch_vec_loop_inc: +; LOOP-DEL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LOOP-DEL-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LOOP-DEL-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LOOP-DEL-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END:%.*]] +; LOOP-DEL: mismatch_vec_loop_found: +; LOOP-DEL-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LOOP-DEL-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LOOP-DEL-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LOOP-DEL-NEXT: br label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_pre: +; LOOP-DEL-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LOOP-DEL: mismatch_loop: +; LOOP-DEL-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LOOP-DEL-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LOOP-DEL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LOOP-DEL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LOOP-DEL-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LOOP-DEL-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_inc: +; LOOP-DEL-NEXT: [[TMP35]] = add nsw i32 [[MISMATCH_INDEX]], 1 +; LOOP-DEL-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LOOP-DEL-NEXT: ret i32 [[MISMATCH_RESULT]] +; +; MASKED-LABEL: define i32 @compare_bytes_signed_wrap( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; MASKED-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; MASKED: mismatch_min_it_check: +; MASKED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; MASKED-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; MASKED-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; MASKED-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; MASKED: mismatch_mem_check: +; MASKED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; MASKED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; MASKED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; MASKED-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; MASKED-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12 +; MASKED-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12 +; MASKED-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12 +; MASKED-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12 +; MASKED-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]] +; MASKED-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]] +; MASKED-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; MASKED-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; MASKED: mismatch_vec_loop_preheader: +; MASKED-NEXT: [[TMP19:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MASKED-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16 +; MASKED-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]] +; MASKED: mismatch_vec_loop: +; MASKED-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ] +; MASKED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP23:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP25:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP26:%.*]] = icmp ne [[TMP23]], [[TMP25]] +; MASKED-NEXT: [[TMP27:%.*]] = select [[MISMATCH_VEC_LOOP_PRED]], [[TMP26]], zeroinitializer +; MASKED-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP27]]) +; MASKED-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]] +; MASKED: mismatch_vec_loop_inc: +; MASKED-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]] +; MASKED-NEXT: [[TMP30]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i64 0 +; MASKED-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]] +; MASKED: mismatch_vec_loop_found: +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[TMP32:%.*]] = and [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]] +; MASKED-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( [[TMP32]], i1 true) +; MASKED-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64 +; MASKED-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]] +; MASKED-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 +; MASKED-NEXT: br label [[MISMATCH_END]] +; MASKED: mismatch_loop_pre: +; MASKED-NEXT: br label [[MISMATCH_LOOP:%.*]] +; MASKED: mismatch_loop: +; MASKED-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; MASKED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1 +; MASKED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 +; MASKED-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]] +; MASKED-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; MASKED: mismatch_loop_inc: +; MASKED-NEXT: [[TMP43]] = add nsw i32 [[MISMATCH_INDEX]], 1 +; MASKED-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]] +; MASKED-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; MASKED: mismatch_end: +; MASKED-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ] +; MASKED-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC:%.*]] = add nsw i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; MASKED: byte.compare: +; MASKED-NEXT: br label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: ret i32 [[INC_LCSSA]] +; +; NO-TRANSFORM-LABEL: define i32 @compare_bytes_signed_wrap( +; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add nsw i32 [[LEN_ADDR]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; NO-TRANSFORM: while.end: +; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]] +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add nsw i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + ret i32 %inc.lcssa +} + + +define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i32 %len, i32 %n) { +; CHECK-LABEL: define i32 @compare_bytes_simple_end_ne_found( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; CHECK-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; CHECK: mismatch_min_it_check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; CHECK: mismatch_mem_check: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; CHECK: mismatch_vec_loop_preheader: +; CHECK-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; CHECK: mismatch_vec_loop: +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; CHECK: mismatch_vec_loop_inc: +; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; CHECK: mismatch_vec_loop_found: +; CHECK-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; CHECK-NEXT: br label [[MISMATCH_END]] +; CHECK: mismatch_loop_pre: +; CHECK-NEXT: br label [[MISMATCH_LOOP:%.*]] +; CHECK: mismatch_loop: +; CHECK-NEXT: [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; CHECK: mismatch_loop_inc: +; CHECK-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; CHECK: mismatch_end: +; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]] +; CHECK: while.found: +; CHECK-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: byte.compare: +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; CHECK-NEXT: br i1 [[TMP39]], label [[WHILE_END]], label [[WHILE_FOUND]] +; CHECK: while.end: +; CHECK-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ] +; CHECK-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ] +; CHECK-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4 +; CHECK-NEXT: ret i32 [[MISMATCH_INDEX]] +; +; LMUL8-LABEL: define i32 @compare_bytes_simple_end_ne_found( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LMUL8-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; LMUL8: mismatch_min_it_check: +; LMUL8-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LMUL8-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LMUL8-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LMUL8-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LMUL8: mismatch_mem_check: +; LMUL8-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LMUL8-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LMUL8-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LMUL8-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LMUL8-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LMUL8-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LMUL8-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LMUL8-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LMUL8-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LMUL8-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LMUL8-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; LMUL8: mismatch_vec_loop_preheader: +; LMUL8-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; LMUL8: mismatch_vec_loop: +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) +; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv64i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LMUL8: mismatch_vec_loop_inc: +; LMUL8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LMUL8-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LMUL8-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LMUL8-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; LMUL8: mismatch_vec_loop_found: +; LMUL8-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LMUL8-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LMUL8-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LMUL8-NEXT: br label [[MISMATCH_END]] +; LMUL8: mismatch_loop_pre: +; LMUL8-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LMUL8: mismatch_loop: +; LMUL8-NEXT: [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64 +; LMUL8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LMUL8-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LMUL8-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LMUL8-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; LMUL8: mismatch_loop_inc: +; LMUL8-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1 +; LMUL8-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LMUL8-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; LMUL8: mismatch_end: +; LMUL8-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LMUL8-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]] +; LMUL8: while.found: +; LMUL8-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: br label [[END:%.*]] +; LMUL8: byte.compare: +; LMUL8-NEXT: [[TMP39:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LMUL8-NEXT: br i1 [[TMP39]], label [[WHILE_END]], label [[WHILE_FOUND]] +; LMUL8: while.end: +; LMUL8-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: br label [[END]] +; LMUL8: end: +; LMUL8-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ] +; LMUL8-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ] +; LMUL8-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4 +; LMUL8-NEXT: ret i32 [[MISMATCH_INDEX]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_simple_end_ne_found( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LOOP-DEL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LOOP-DEL: mismatch_mem_check: +; LOOP-DEL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LOOP-DEL-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LOOP-DEL-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LOOP-DEL-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LOOP-DEL-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LOOP-DEL-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]] +; LOOP-DEL: mismatch_vec_loop: +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ] +; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LOOP-DEL: mismatch_vec_loop_inc: +; LOOP-DEL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LOOP-DEL-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LOOP-DEL-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LOOP-DEL-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[BYTE_COMPARE:%.*]] +; LOOP-DEL: mismatch_vec_loop_found: +; LOOP-DEL-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST1]] to i64 +; LOOP-DEL-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP26]] +; LOOP-DEL-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LOOP-DEL-NEXT: br label [[BYTE_COMPARE]] +; LOOP-DEL: mismatch_loop_pre: +; LOOP-DEL-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LOOP-DEL: mismatch_loop: +; LOOP-DEL-NEXT: [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LOOP-DEL-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64 +; LOOP-DEL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LOOP-DEL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LOOP-DEL-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LOOP-DEL-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[BYTE_COMPARE]] +; LOOP-DEL: mismatch_loop_inc: +; LOOP-DEL-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX3]], 1 +; LOOP-DEL-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP36]], label [[BYTE_COMPARE]], label [[MISMATCH_LOOP]] +; LOOP-DEL: byte.compare: +; LOOP-DEL-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LOOP-DEL-NEXT: [[TMP37:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LOOP-DEL-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP37]], i32 [[N]], i32 [[MISMATCH_RESULT]] +; LOOP-DEL-NEXT: [[SPEC_SELECT4:%.*]] = select i1 [[TMP37]], ptr [[D]], ptr [[C]] +; LOOP-DEL-NEXT: store i32 [[SPEC_SELECT]], ptr [[SPEC_SELECT4]], align 4 +; LOOP-DEL-NEXT: ret i32 [[SPEC_SELECT]] +; +; MASKED-LABEL: define i32 @compare_bytes_simple_end_ne_found( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; MASKED-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; MASKED: mismatch_min_it_check: +; MASKED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; MASKED-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; MASKED-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; MASKED-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; MASKED: mismatch_mem_check: +; MASKED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; MASKED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; MASKED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; MASKED-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; MASKED-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12 +; MASKED-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12 +; MASKED-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12 +; MASKED-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12 +; MASKED-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]] +; MASKED-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]] +; MASKED-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; MASKED-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; MASKED: mismatch_vec_loop_preheader: +; MASKED-NEXT: [[TMP19:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MASKED-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16 +; MASKED-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]] +; MASKED: mismatch_vec_loop: +; MASKED-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ] +; MASKED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP23:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP25:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP26:%.*]] = icmp ne [[TMP23]], [[TMP25]] +; MASKED-NEXT: [[TMP27:%.*]] = select [[MISMATCH_VEC_LOOP_PRED]], [[TMP26]], zeroinitializer +; MASKED-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP27]]) +; MASKED-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]] +; MASKED: mismatch_vec_loop_inc: +; MASKED-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]] +; MASKED-NEXT: [[TMP30]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i64 0 +; MASKED-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]] +; MASKED: mismatch_vec_loop_found: +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[TMP32:%.*]] = and [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]] +; MASKED-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( [[TMP32]], i1 true) +; MASKED-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64 +; MASKED-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]] +; MASKED-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 +; MASKED-NEXT: br label [[MISMATCH_END]] +; MASKED: mismatch_loop_pre: +; MASKED-NEXT: br label [[MISMATCH_LOOP:%.*]] +; MASKED: mismatch_loop: +; MASKED-NEXT: [[MISMATCH_INDEX3:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX3]] to i64 +; MASKED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1 +; MASKED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 +; MASKED-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]] +; MASKED-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; MASKED: mismatch_loop_inc: +; MASKED-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX3]], 1 +; MASKED-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]] +; MASKED-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; MASKED: mismatch_end: +; MASKED-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX3]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ] +; MASKED-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]] +; MASKED: while.found: +; MASKED-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[C]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: br label [[END:%.*]] +; MASKED: byte.compare: +; MASKED-NEXT: [[TMP47:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; MASKED-NEXT: br i1 [[TMP47]], label [[WHILE_END]], label [[WHILE_FOUND]] +; MASKED: while.end: +; MASKED-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ], [ [[N]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ], [ [[D]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: br label [[END]] +; MASKED: end: +; MASKED-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ] +; MASKED-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ] +; MASKED-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4 +; MASKED-NEXT: ret i32 [[MISMATCH_INDEX]] +; +; NO-TRANSFORM-LABEL: define i32 @compare_bytes_simple_end_ne_found( +; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_FOUND:%.*]] +; NO-TRANSFORM: while.found: +; NO-TRANSFORM-NEXT: [[MISMATCH_INDEX1:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ] +; NO-TRANSFORM-NEXT: [[FOUND_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ] +; NO-TRANSFORM-NEXT: br label [[END:%.*]] +; NO-TRANSFORM: while.end: +; NO-TRANSFORM-NEXT: [[MISMATCH_INDEX2:%.*]] = phi i32 [ [[N]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: [[END_PTR:%.*]] = phi ptr [ [[D]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: br label [[END]] +; NO-TRANSFORM: end: +; NO-TRANSFORM-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[MISMATCH_INDEX1]], [[WHILE_FOUND]] ], [ [[MISMATCH_INDEX2]], [[WHILE_END]] ] +; NO-TRANSFORM-NEXT: [[STORE_PTR:%.*]] = phi ptr [ [[END_PTR]], [[WHILE_END]] ], [ [[FOUND_PTR]], [[WHILE_FOUND]] ] +; NO-TRANSFORM-NEXT: store i32 [[MISMATCH_INDEX]], ptr [[STORE_PTR]], align 4 +; NO-TRANSFORM-NEXT: ret i32 [[MISMATCH_INDEX]] +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.found + +while.found: + %mismatch_index1 = phi i32 [ %inc, %while.body ] + %found_ptr = phi ptr [ %c, %while.body ] + br label %end + +while.end: + %mismatch_index2 = phi i32 [ %n, %while.cond ] + %end_ptr = phi ptr [ %d, %while.cond ] + br label %end + +end: + %mismatch_index = phi i32 [ %mismatch_index1, %while.found ], [ %mismatch_index2, %while.end ] + %store_ptr = phi ptr [ %end_ptr, %while.end ], [ %found_ptr, %while.found ] + store i32 %mismatch_index, ptr %store_ptr + ret i32 %mismatch_index +} + + + +define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) { +; CHECK-LABEL: define i32 @compare_bytes_extra_cmp( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]] +; CHECK-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]] +; CHECK: ph: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; CHECK-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; CHECK: mismatch_min_it_check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; CHECK: mismatch_mem_check: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; CHECK: mismatch_vec_loop_preheader: +; CHECK-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; CHECK: mismatch_vec_loop: +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; CHECK: mismatch_vec_loop_inc: +; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; CHECK: mismatch_vec_loop_found: +; CHECK-NEXT: [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; CHECK-NEXT: br label [[MISMATCH_END]] +; CHECK: mismatch_loop_pre: +; CHECK-NEXT: br label [[MISMATCH_LOOP:%.*]] +; CHECK: mismatch_loop: +; CHECK-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; CHECK: mismatch_loop_inc: +; CHECK-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; CHECK-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; CHECK: mismatch_end: +; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]] +; CHECK: byte.compare: +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT]] +; CHECK: while.end.loopexit: +; CHECK-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[INC_LCSSA]] +; +; LMUL8-LABEL: define i32 @compare_bytes_extra_cmp( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]] +; LMUL8-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]] +; LMUL8: ph: +; LMUL8-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LMUL8-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; LMUL8: mismatch_min_it_check: +; LMUL8-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LMUL8-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LMUL8-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LMUL8-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LMUL8: mismatch_mem_check: +; LMUL8-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LMUL8-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LMUL8-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LMUL8-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LMUL8-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LMUL8-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LMUL8-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LMUL8-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LMUL8-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LMUL8-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LMUL8-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LMUL8-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LMUL8-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; LMUL8: mismatch_vec_loop_preheader: +; LMUL8-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; LMUL8: mismatch_vec_loop: +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) +; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv64i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LMUL8: mismatch_vec_loop_inc: +; LMUL8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LMUL8-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LMUL8-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LMUL8-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; LMUL8: mismatch_vec_loop_found: +; LMUL8-NEXT: [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64 +; LMUL8-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]] +; LMUL8-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LMUL8-NEXT: br label [[MISMATCH_END]] +; LMUL8: mismatch_loop_pre: +; LMUL8-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LMUL8: mismatch_loop: +; LMUL8-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LMUL8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LMUL8-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LMUL8-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LMUL8-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LMUL8-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; LMUL8: mismatch_loop_inc: +; LMUL8-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; LMUL8-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LMUL8-NEXT: br i1 [[TMP36]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; LMUL8: mismatch_end: +; LMUL8-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LMUL8-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP37]], [[TMP38]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]] +; LMUL8: byte.compare: +; LMUL8-NEXT: br label [[WHILE_END_LOOPEXIT]] +; LMUL8: while.end.loopexit: +; LMUL8-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: br label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ] +; LMUL8-NEXT: ret i32 [[INC_LCSSA]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]] +; LOOP-DEL-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]] +; LOOP-DEL: ph: +; LOOP-DEL-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; LOOP-DEL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LOOP-DEL: mismatch_mem_check: +; LOOP-DEL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; LOOP-DEL-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; LOOP-DEL-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; LOOP-DEL-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; LOOP-DEL-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP7]], 12 +; LOOP-DEL-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP11]], 12 +; LOOP-DEL-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP8]], 12 +; LOOP-DEL-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 12 +; LOOP-DEL-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP6]], [[TMP9]] +; LOOP-DEL-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP12]], [[TMP15]] +; LOOP-DEL-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; LOOP-DEL-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP:%.*]], !prof [[PROF1]] +; LOOP-DEL: mismatch_vec_loop: +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ [[TMP24:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ], [ [[TMP1]], [[MISMATCH_MEM_CHECK]] ] +; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] +; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LOOP-DEL: mismatch_vec_loop_inc: +; LOOP-DEL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP19]] to i64 +; LOOP-DEL-NEXT: [[TMP24]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP23]] +; LOOP-DEL-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP24]], [[TMP2]] +; LOOP-DEL-NEXT: br i1 [[TMP25]], label [[MISMATCH_VECTOR_LOOP]], label [[WHILE_END]] +; LOOP-DEL: mismatch_vec_loop_found: +; LOOP-DEL-NEXT: [[FIRST2:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[MISMATCH_VECTOR_INDEX3:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LOOP-DEL-NEXT: [[TMP26:%.*]] = zext i32 [[FIRST2]] to i64 +; LOOP-DEL-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX3]], [[TMP26]] +; LOOP-DEL-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +; LOOP-DEL-NEXT: br label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_pre: +; LOOP-DEL-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LOOP-DEL: mismatch_loop: +; LOOP-DEL-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP35:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LOOP-DEL-NEXT: [[TMP29:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LOOP-DEL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LOOP-DEL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP29]] +; LOOP-DEL-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; LOOP-DEL-NEXT: [[TMP34:%.*]] = icmp eq i8 [[TMP31]], [[TMP33]] +; LOOP-DEL-NEXT: br i1 [[TMP34]], label [[MISMATCH_LOOP_INC]], label [[WHILE_END]] +; LOOP-DEL: mismatch_loop_inc: +; LOOP-DEL-NEXT: [[TMP35]] = add i32 [[MISMATCH_INDEX]], 1 +; LOOP-DEL-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP36]], label [[WHILE_END]], label [[MISMATCH_LOOP]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP28]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LOOP-DEL-NEXT: ret i32 [[INC_LCSSA]] +; +; MASKED-LABEL: define i32 @compare_bytes_extra_cmp( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]] +; MASKED-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]] +; MASKED: ph: +; MASKED-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], 1 +; MASKED-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; MASKED: mismatch_min_it_check: +; MASKED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; MASKED-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 +; MASKED-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP0]], [[N]] +; MASKED-NEXT: br i1 [[TMP3]], label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; MASKED: mismatch_mem_check: +; MASKED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; MASKED-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; MASKED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; MASKED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; MASKED-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; MASKED-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; MASKED-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP7]], 12 +; MASKED-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP10]], 12 +; MASKED-NEXT: [[TMP14:%.*]] = lshr i64 [[TMP6]], 12 +; MASKED-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 12 +; MASKED-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP12]], [[TMP13]] +; MASKED-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]] +; MASKED-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; MASKED-NEXT: br i1 [[TMP18]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; MASKED: mismatch_vec_loop_preheader: +; MASKED-NEXT: [[TMP19:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MASKED-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP20]], 16 +; MASKED-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]] +; MASKED: mismatch_vec_loop: +; MASKED-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi [ [[TMP19]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP30:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ [[TMP1]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP29:%.*]], [[MISMATCH_VEC_LOOP_INC]] ] +; MASKED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP23:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP22]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP25:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP26:%.*]] = icmp ne [[TMP23]], [[TMP25]] +; MASKED-NEXT: [[TMP27:%.*]] = select [[MISMATCH_VEC_LOOP_PRED]], [[TMP26]], zeroinitializer +; MASKED-NEXT: [[TMP28:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP27]]) +; MASKED-NEXT: br i1 [[TMP28]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]] +; MASKED: mismatch_vec_loop_inc: +; MASKED-NEXT: [[TMP29]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP21]] +; MASKED-NEXT: [[TMP30]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP29]], i64 [[TMP2]]) +; MASKED-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i64 0 +; MASKED-NEXT: br i1 [[TMP31]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]] +; MASKED: mismatch_vec_loop_found: +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi [ [[TMP27]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[TMP32:%.*]] = and [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]] +; MASKED-NEXT: [[TMP33:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( [[TMP32]], i1 true) +; MASKED-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64 +; MASKED-NEXT: [[TMP35:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP34]] +; MASKED-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 +; MASKED-NEXT: br label [[MISMATCH_END]] +; MASKED: mismatch_loop_pre: +; MASKED-NEXT: br label [[MISMATCH_LOOP:%.*]] +; MASKED: mismatch_loop: +; MASKED-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ [[TMP0]], [[MISMATCH_LOOP_PRE]] ], [ [[TMP43:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[TMP37:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; MASKED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP38]], align 1 +; MASKED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP37]] +; MASKED-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1 +; MASKED-NEXT: [[TMP42:%.*]] = icmp eq i8 [[TMP39]], [[TMP41]] +; MASKED-NEXT: br i1 [[TMP42]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; MASKED: mismatch_loop_inc: +; MASKED-NEXT: [[TMP43]] = add i32 [[MISMATCH_INDEX]], 1 +; MASKED-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], [[N]] +; MASKED-NEXT: br i1 [[TMP44]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; MASKED: mismatch_end: +; MASKED-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ [[N]], [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ [[N]], [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP36]], [[MISMATCH_VEC_LOOP_FOUND]] ] +; MASKED-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]] +; MASKED: byte.compare: +; MASKED-NEXT: br label [[WHILE_END_LOOPEXIT]] +; MASKED: while.end.loopexit: +; MASKED-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: br label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ] +; MASKED-NEXT: ret i32 [[INC_LCSSA]] +; +; NO-TRANSFORM-LABEL: define i32 @compare_bytes_extra_cmp( +; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: [[CMP_X:%.*]] = icmp ult i32 [[N]], [[X]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_X]], label [[PH:%.*]], label [[WHILE_END:%.*]] +; NO-TRANSFORM: ph: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; NO-TRANSFORM: while.end: +; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ] +; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]] +entry: + %cmp.x = icmp ult i32 %n, %x + br i1 %cmp.x, label %ph, label %while.end + +ph: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ], [ %x, %entry ] + ret i32 %inc.lcssa +} + +define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) { +; CHECK-LABEL: define void @compare_bytes_cleanup_block( +; CHECK-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; CHECK: mismatch_min_it_check: +; CHECK-NEXT: br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; CHECK: mismatch_mem_check: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP10]], 12 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 12 +; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP9]], 12 +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; CHECK: mismatch_vec_loop_preheader: +; CHECK-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; CHECK: mismatch_vec_loop: +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP20:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]] +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv16i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; CHECK: mismatch_vec_loop_inc: +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i64 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; CHECK: mismatch_vec_loop_found: +; CHECK-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[FIRST1]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +; CHECK-NEXT: br label [[MISMATCH_END]] +; CHECK: mismatch_loop_pre: +; CHECK-NEXT: br label [[MISMATCH_LOOP:%.*]] +; CHECK: mismatch_loop: +; CHECK-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP31:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i8 [[TMP27]], [[TMP29]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; CHECK: mismatch_loop_inc: +; CHECK-NEXT: [[TMP31]] = add i32 [[MISMATCH_INDEX]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[TMP32]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; CHECK: mismatch_end: +; CHECK-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP24]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; CHECK-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ] +; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]] +; CHECK: byte.compare: +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; CHECK-NEXT: br i1 [[TMP35]], label [[CLEANUP_THREAD]], label [[IF_END]] +; CHECK: cleanup.thread: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; CHECK-NEXT: ret void +; +; LMUL8-LABEL: define void @compare_bytes_cleanup_block( +; LMUL8-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; LMUL8: mismatch_min_it_check: +; LMUL8-NEXT: br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; LMUL8: mismatch_mem_check: +; LMUL8-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1 +; LMUL8-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1 +; LMUL8-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; LMUL8-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; LMUL8-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0 +; LMUL8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0 +; LMUL8-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +; LMUL8-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +; LMUL8-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP10]], 12 +; LMUL8-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 12 +; LMUL8-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP9]], 12 +; LMUL8-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP7]], 12 +; LMUL8-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP2]], [[TMP5]] +; LMUL8-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] +; LMUL8-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] +; LMUL8-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VECTOR_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; LMUL8: mismatch_vec_loop_preheader: +; LMUL8-NEXT: br label [[MISMATCH_VECTOR_LOOP:%.*]] +; LMUL8: mismatch_vec_loop: +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VECTOR_LOOP_PREHEADER]] ], [ [[TMP20:%.*]], [[MISMATCH_VECTOR_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) +; LMUL8-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]] +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call @llvm.vp.load.nxv64i8.p0(ptr [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call @llvm.vp.icmp.nxv64i8( [[LHS_LOAD]], [[RHS_LOAD]], metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1( [[MISMATCH_CMP]], i1 false, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]] +; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] +; LMUL8: mismatch_vec_loop_inc: +; LMUL8-NEXT: [[TMP19:%.*]] = zext i32 [[TMP15]] to i64 +; LMUL8-NEXT: [[TMP20]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX]], [[TMP19]] +; LMUL8-NEXT: [[TMP21:%.*]] = icmp ne i64 [[TMP20]], 0 +; LMUL8-NEXT: br i1 [[TMP21]], label [[MISMATCH_VECTOR_LOOP]], label [[MISMATCH_END:%.*]] +; LMUL8: mismatch_vec_loop_found: +; LMUL8-NEXT: [[FIRST1:%.*]] = phi i32 [ [[FIRST]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[MISMATCH_VECTOR_INDEX2:%.*]] = phi i64 [ [[MISMATCH_VECTOR_INDEX]], [[MISMATCH_VECTOR_LOOP]] ] +; LMUL8-NEXT: [[TMP22:%.*]] = zext i32 [[FIRST1]] to i64 +; LMUL8-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[MISMATCH_VECTOR_INDEX2]], [[TMP22]] +; LMUL8-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +; LMUL8-NEXT: br label [[MISMATCH_END]] +; LMUL8: mismatch_loop_pre: +; LMUL8-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LMUL8: mismatch_loop: +; LMUL8-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP31:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; LMUL8-NEXT: [[TMP25:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LMUL8-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP25]] +; LMUL8-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 1 +; LMUL8-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP25]] +; LMUL8-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 +; LMUL8-NEXT: [[TMP30:%.*]] = icmp eq i8 [[TMP27]], [[TMP29]] +; LMUL8-NEXT: br i1 [[TMP30]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; LMUL8: mismatch_loop_inc: +; LMUL8-NEXT: [[TMP31]] = add i32 [[MISMATCH_INDEX]], 1 +; LMUL8-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 +; LMUL8-NEXT: br i1 [[TMP32]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; LMUL8: mismatch_end: +; LMUL8-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VECTOR_LOOP_INC]] ], [ [[TMP24]], [[MISMATCH_VECTOR_LOOP_FOUND]] ] +; LMUL8-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ] +; LMUL8-NEXT: [[INC:%.*]] = add i32 [[LEN]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP33]], [[TMP34]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]] +; LMUL8: byte.compare: +; LMUL8-NEXT: [[TMP35:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; LMUL8-NEXT: br i1 [[TMP35]], label [[CLEANUP_THREAD]], label [[IF_END]] +; LMUL8: cleanup.thread: +; LMUL8-NEXT: ret void +; LMUL8: if.end: +; LMUL8-NEXT: [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; LMUL8-NEXT: ret void +; +; LOOP-DEL-LABEL: define void @compare_bytes_cleanup_block( +; LOOP-DEL-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[MISMATCH_LOOP:%.*]] +; LOOP-DEL: mismatch_loop: +; LOOP-DEL-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[MISMATCH_LOOP]] ] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; LOOP-DEL-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP0]] +; LOOP-DEL-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP0]] +; LOOP-DEL-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 +; LOOP-DEL-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP2]], [[TMP4]] +; LOOP-DEL-NEXT: [[TMP6]] = add i32 [[MISMATCH_INDEX]], 1 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; LOOP-DEL-NEXT: [[OR_COND:%.*]] = or i1 [[TMP5]], [[TMP7]] +; LOOP-DEL-NEXT: br i1 [[OR_COND]], label [[COMMON_RET:%.*]], label [[MISMATCH_LOOP]] +; LOOP-DEL: common.ret: +; LOOP-DEL-NEXT: ret void +; +; MASKED-LABEL: define void @compare_bytes_cleanup_block( +; MASKED-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: br label [[MISMATCH_MIN_IT_CHECK:%.*]] +; MASKED: mismatch_min_it_check: +; MASKED-NEXT: br i1 false, label [[MISMATCH_MEM_CHECK:%.*]], label [[MISMATCH_LOOP_PRE:%.*]], !prof [[PROF0]] +; MASKED: mismatch_mem_check: +; MASKED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC1]], i64 1 +; MASKED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SRC2]], i64 1 +; MASKED-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; MASKED-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; MASKED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC1]], i64 0 +; MASKED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC2]], i64 0 +; MASKED-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; MASKED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; MASKED-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP3]], 12 +; MASKED-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP6]], 12 +; MASKED-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP2]], 12 +; MASKED-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP7]], 12 +; MASKED-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]] +; MASKED-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; MASKED-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] +; MASKED-NEXT: br i1 [[TMP14]], label [[MISMATCH_LOOP_PRE]], label [[MISMATCH_VEC_LOOP_PREHEADER:%.*]], !prof [[PROF1]] +; MASKED: mismatch_vec_loop_preheader: +; MASKED-NEXT: [[TMP15:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 0) +; MASKED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; MASKED-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP16]], 16 +; MASKED-NEXT: br label [[MISMATCH_VEC_LOOP:%.*]] +; MASKED: mismatch_vec_loop: +; MASKED-NEXT: [[MISMATCH_VEC_LOOP_PRED:%.*]] = phi [ [[TMP15]], [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP26:%.*]], [[MISMATCH_VEC_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[MISMATCH_VEC_INDEX:%.*]] = phi i64 [ 1, [[MISMATCH_VEC_LOOP_PREHEADER]] ], [ [[TMP25:%.*]], [[MISMATCH_VEC_LOOP_INC]] ] +; MASKED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP19:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VEC_INDEX]] +; MASKED-NEXT: [[TMP21:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, [[MISMATCH_VEC_LOOP_PRED]], zeroinitializer) +; MASKED-NEXT: [[TMP22:%.*]] = icmp ne [[TMP19]], [[TMP21]] +; MASKED-NEXT: [[TMP23:%.*]] = select [[MISMATCH_VEC_LOOP_PRED]], [[TMP22]], zeroinitializer +; MASKED-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP23]]) +; MASKED-NEXT: br i1 [[TMP24]], label [[MISMATCH_VEC_LOOP_FOUND:%.*]], label [[MISMATCH_VEC_LOOP_INC]] +; MASKED: mismatch_vec_loop_inc: +; MASKED-NEXT: [[TMP25]] = add nuw nsw i64 [[MISMATCH_VEC_INDEX]], [[TMP17]] +; MASKED-NEXT: [[TMP26]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP25]], i64 0) +; MASKED-NEXT: [[TMP27:%.*]] = extractelement [[TMP26]], i64 0 +; MASKED-NEXT: br i1 [[TMP27]], label [[MISMATCH_VEC_LOOP]], label [[MISMATCH_END:%.*]] +; MASKED: mismatch_vec_loop_found: +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_PRED:%.*]] = phi [ [[TMP23]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_LAST_LOOP_PRED:%.*]] = phi [ [[MISMATCH_VEC_LOOP_PRED]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[MISMATCH_VEC_FOUND_INDEX:%.*]] = phi i64 [ [[MISMATCH_VEC_INDEX]], [[MISMATCH_VEC_LOOP]] ] +; MASKED-NEXT: [[TMP28:%.*]] = and [[MISMATCH_VEC_LAST_LOOP_PRED]], [[MISMATCH_VEC_FOUND_PRED]] +; MASKED-NEXT: [[TMP29:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( [[TMP28]], i1 true) +; MASKED-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +; MASKED-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[MISMATCH_VEC_FOUND_INDEX]], [[TMP30]] +; MASKED-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32 +; MASKED-NEXT: br label [[MISMATCH_END]] +; MASKED: mismatch_loop_pre: +; MASKED-NEXT: br label [[MISMATCH_LOOP:%.*]] +; MASKED: mismatch_loop: +; MASKED-NEXT: [[MISMATCH_INDEX:%.*]] = phi i32 [ 1, [[MISMATCH_LOOP_PRE]] ], [ [[TMP39:%.*]], [[MISMATCH_LOOP_INC:%.*]] ] +; MASKED-NEXT: [[TMP33:%.*]] = zext i32 [[MISMATCH_INDEX]] to i64 +; MASKED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[TMP33]] +; MASKED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1 +; MASKED-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[TMP33]] +; MASKED-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1 +; MASKED-NEXT: [[TMP38:%.*]] = icmp eq i8 [[TMP35]], [[TMP37]] +; MASKED-NEXT: br i1 [[TMP38]], label [[MISMATCH_LOOP_INC]], label [[MISMATCH_END]] +; MASKED: mismatch_loop_inc: +; MASKED-NEXT: [[TMP39]] = add i32 [[MISMATCH_INDEX]], 1 +; MASKED-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0 +; MASKED-NEXT: br i1 [[TMP40]], label [[MISMATCH_END]], label [[MISMATCH_LOOP]] +; MASKED: mismatch_end: +; MASKED-NEXT: [[MISMATCH_RESULT:%.*]] = phi i32 [ 0, [[MISMATCH_LOOP_INC]] ], [ [[MISMATCH_INDEX]], [[MISMATCH_LOOP]] ], [ 0, [[MISMATCH_VEC_LOOP_INC]] ], [ [[TMP32]], [[MISMATCH_VEC_LOOP_FOUND]] ] +; MASKED-NEXT: br i1 true, label [[BYTE_COMPARE:%.*]], label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ], [ 0, [[MISMATCH_END]] ] +; MASKED-NEXT: [[INC:%.*]] = add i32 [[LEN]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]] +; MASKED: byte.compare: +; MASKED-NEXT: [[TMP43:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], 0 +; MASKED-NEXT: br i1 [[TMP43]], label [[CLEANUP_THREAD]], label [[IF_END]] +; MASKED: cleanup.thread: +; MASKED-NEXT: ret void +; MASKED: if.end: +; MASKED-NEXT: [[RES:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ] +; MASKED-NEXT: ret void +; +; NO-TRANSFORM-LABEL: define void @compare_bytes_cleanup_block( +; NO-TRANSFORM-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], 0 +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP_THREAD:%.*]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[IF_END:%.*]] +; NO-TRANSFORM: cleanup.thread: +; NO-TRANSFORM-NEXT: ret void +; NO-TRANSFORM: if.end: +; NO-TRANSFORM-NEXT: [[RES:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ] +; NO-TRANSFORM-NEXT: ret void +entry: + br label %while.cond + +while.cond: + %len = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %inc = add i32 %len, 1 + %cmp.not = icmp eq i32 %inc, 0 + br i1 %cmp.not, label %cleanup.thread, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr i8, ptr %src1, i64 %idxprom + %0 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr i8, ptr %src2, i64 %idxprom + %1 = load i8, ptr %arrayidx2, align 1 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %if.end + +cleanup.thread: + ret void + +if.end: + %res = phi i32 [ %inc, %while.body ] + ret void +} + +; +; NEGATIVE TESTS +; + +; Similar to @compare_bytes_simple, except in the while.end block we have an extra PHI +; with unique values for each incoming block from the loop. +define i32 @compare_bytes_simple2(ptr %a, ptr %b, ptr %c, ptr %d, i32 %len, i32 %n) { +; CHECK-LABEL: define i32 @compare_bytes_simple2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; CHECK-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ] +; CHECK-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4 +; CHECK-NEXT: ret i32 [[INC_LCSSA]] +; +; LMUL8-LABEL: define i32 @compare_bytes_simple2( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: br label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LMUL8-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ] +; LMUL8-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4 +; LMUL8-NEXT: ret i32 [[INC_LCSSA]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_simple2( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]] +; LOOP-DEL: while.cond: +; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LOOP-DEL: while.body: +; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4 +; LOOP-DEL-NEXT: ret i32 [[INC_LCSSA]] +; +; MASKED-LABEL: define i32 @compare_bytes_simple2( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: br label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; MASKED-NEXT: [[FINAL_PTR:%.*]] = phi ptr [ [[C]], [[WHILE_BODY]] ], [ [[D]], [[WHILE_COND]] ] +; MASKED-NEXT: store i32 [[INC_LCSSA]], ptr [[FINAL_PTR]], align 4 +; MASKED-NEXT: ret i32 [[INC_LCSSA]] +; +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + %final_ptr = phi ptr [ %c, %while.body ], [ %d, %while.cond ] + store i32 %inc.lcssa, ptr %final_ptr + ret i32 %inc.lcssa +} + +define i32 @compare_bytes_simple3(ptr %a, ptr %b, ptr %c, i32 %d, i32 %len, i32 %n) { +; CHECK-LABEL: define i32 @compare_bytes_simple3( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; CHECK-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4 +; CHECK-NEXT: ret i32 [[FINAL_VAL]] +; +; LMUL8-LABEL: define i32 @compare_bytes_simple3( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: br label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LMUL8-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4 +; LMUL8-NEXT: ret i32 [[FINAL_VAL]] +; +; LOOP-DEL-LABEL: define i32 @compare_bytes_simple3( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]] +; LOOP-DEL: while.cond: +; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LOOP-DEL: while.body: +; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4 +; LOOP-DEL-NEXT: ret i32 [[FINAL_VAL]] +; +; MASKED-LABEL: define i32 @compare_bytes_simple3( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: br label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; MASKED-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4 +; MASKED-NEXT: ret i32 [[FINAL_VAL]] +; + entry: + br label %while.cond + + while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + + while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + + while.end: + %final_val = phi i32 [ %d, %while.body ], [ %inc, %while.cond ] + store i32 %final_val, ptr %c + ret i32 %final_val +} + +; Disable the optimization when noimplicitfloat is present. +define i32 @no_implicit_float(ptr %a, ptr %b, i32 %len, i32 %n) noimplicitfloat { +; CHECK-LABEL: define i32 @no_implicit_float( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; CHECK-NEXT: ret i32 [[INC_LCSSA]] +; +; LMUL8-LABEL: define i32 @no_implicit_float( +; LMUL8-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; LMUL8-NEXT: entry: +; LMUL8-NEXT: br label [[WHILE_COND:%.*]] +; LMUL8: while.cond: +; LMUL8-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LMUL8-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LMUL8-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LMUL8-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LMUL8: while.body: +; LMUL8-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LMUL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LMUL8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LMUL8-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LMUL8-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LMUL8-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LMUL8: while.end: +; LMUL8-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LMUL8-NEXT: ret i32 [[INC_LCSSA]] +; +; LOOP-DEL-LABEL: define i32 @no_implicit_float( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]] +; LOOP-DEL: while.cond: +; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LOOP-DEL: while.body: +; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: ret i32 [[INC_LCSSA]] +; +; MASKED-LABEL: define i32 @no_implicit_float( +; MASKED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; MASKED-NEXT: entry: +; MASKED-NEXT: br label [[WHILE_COND:%.*]] +; MASKED: while.cond: +; MASKED-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; MASKED-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; MASKED-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; MASKED-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; MASKED: while.body: +; MASKED-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; MASKED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MASKED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; MASKED-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MASKED-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; MASKED-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; MASKED: while.end: +; MASKED-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; MASKED-NEXT: ret i32 [[INC_LCSSA]] +; +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + ret i32 %inc.lcssa +}