diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 7c5e5336b6531..88dbc60cc4bd5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5290,11 +5290,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } - // Sink vscales closer to uses for better isel + auto ShouldSinkCondition = [](Value *Cond) -> bool { + auto *II = dyn_cast(Cond); + return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or && + isa(II->getOperand(0)->getType()); + }; + switch (I->getOpcode()) { case Instruction::GetElementPtr: case Instruction::Add: case Instruction::Sub: + // Sink vscales closer to uses for better isel for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) { if (shouldSinkVScale(I->getOperand(Op), Ops)) { Ops.push_back(&I->getOperandUse(Op)); @@ -5302,6 +5308,23 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } break; + case Instruction::Select: { + if (!ShouldSinkCondition(I->getOperand(0))) + return false; + + Ops.push_back(&I->getOperandUse(0)); + return true; + } + case Instruction::Br: { + if (cast(I)->isUnconditional()) + return false; + + if (!ShouldSinkCondition(cast(I)->getCondition())) + return false; + + Ops.push_back(&I->getOperandUse(0)); + return true; + } default: break; } diff --git a/llvm/test/CodeGen/AArch64/reduce-or-opt.ll b/llvm/test/CodeGen/AArch64/reduce-or-opt.ll new file mode 100644 index 0000000000000..f5df5ea53c990 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/reduce-or-opt.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s + +define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) { +; CHECK-LABEL: select_or_reduce_v2i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB0_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q0, [x0, x8] +; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 +; CHECK-NEXT: umaxv s0, v0.4s +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: tbnz w9, #0, .LBB0_3 +; CHECK-NEXT: // %bb.2: // %vector.body +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: b.ne .LBB0_1 +; CHECK-NEXT: .LBB0_3: // %middle.split +; CHECK-NEXT: and x0, x9, #0x1 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 + %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, 2 + %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + %sel = select i1 %or.reduc, i64 1, i64 0 + ret i64 %sel +} + +define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { +; CHECK-LABEL: br_or_reduce_v2i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB1_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q0, [x0, x8] +; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 +; CHECK-NEXT: umaxv s0, v0.4s +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: tbnz w9, #0, .LBB1_3 +; CHECK-NEXT: // %bb.2: // %vector.body +; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: b.ne .LBB1_1 +; CHECK-NEXT: .LBB1_3: // %middle.split +; CHECK-NEXT: tbz w9, #0, .LBB1_5 +; CHECK-NEXT: // %bb.4: // %found +; CHECK-NEXT: mov w8, #56 // =0x38 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 + %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, 2 + %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + br i1 %or.reduc, label %found, label %notfound + +found: + store i64 56, ptr %p, align 8 + ret i64 1 + +notfound: + ret i64 0 +} + +define i64 @select_or_reduce_nxv2i1(ptr nocapture noundef readonly %src) { +; CHECK-LABEL: select_or_reduce_nxv2i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: neg x10, x8 +; CHECK-NEXT: add x10, x10, #4 +; CHECK-NEXT: .LBB2_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] +; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: b.ne .LBB2_3 +; CHECK-NEXT: // %bb.2: // %vector.body +; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cmp x10, x9 +; CHECK-NEXT: add x9, x9, x8 +; CHECK-NEXT: b.ne .LBB2_1 +; CHECK-NEXT: .LBB2_3: // %middle.split +; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret +entry: + %vscale = tail call i64 @llvm.vscale.i64() + %vf = shl nuw nsw i64 %vscale, 1 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load , ptr %arrayidx, align 8 + %cond = icmp eq %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, %vf + %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + %sel = select i1 %or.reduc, i64 1, i64 0 + ret i64 %sel +} + +define i64 @br_or_reduce_nxv2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { +; CHECK-LABEL: br_or_reduce_nxv2i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: neg x10, x8 +; CHECK-NEXT: add x10, x10, #4 +; CHECK-NEXT: .LBB3_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] +; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: b.ne .LBB3_3 +; CHECK-NEXT: // %bb.2: // %vector.body +; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 +; CHECK-NEXT: cmp x10, x9 +; CHECK-NEXT: add x9, x9, x8 +; CHECK-NEXT: b.ne .LBB3_1 +; CHECK-NEXT: .LBB3_3: // %middle.split +; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: b.eq .LBB3_5 +; CHECK-NEXT: // %bb.4: // %found +; CHECK-NEXT: mov w8, #56 // =0x38 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret +entry: + %vscale = tail call i64 @llvm.vscale.i64() + %vf = shl nuw nsw i64 %vscale, 1 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load , ptr %arrayidx, align 8 + %cond = icmp eq %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, %vf + %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + br i1 %or.reduc, label %found, label %notfound + +found: + store i64 56, ptr %p, align 8 + ret i64 1 + +notfound: + ret i64 0 +} + +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.or.nxv2i1() diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/reduce-or-opt.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/reduce-or-opt.ll new file mode 100644 index 0000000000000..52257c10b0bf6 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/reduce-or-opt.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -codegenprepare -S < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s + +define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) { +; CHECK-LABEL: define i64 @select_or_reduce_v2i1( +; CHECK-SAME: ptr nocapture noundef readonly [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x ptr> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[OR_REDUC:%.*]] = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[COND]]) +; CHECK-NEXT: [[IV_CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: [[EXIT_COND:%.*]] = or i1 [[OR_REDUC]], [[IV_CMP]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[OR_REDUC]], i64 1, i64 0 +; CHECK-NEXT: ret i64 [[SEL]] +; +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 + %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, 2 + %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + %sel = select i1 %or.reduc, i64 1, i64 0 + ret i64 %sel +} + +define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { +; CHECK-LABEL: define i64 @br_or_reduce_v2i1( +; CHECK-SAME: ptr nocapture noundef readonly [[SRC:%.*]], ptr noundef readnone [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x ptr> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[OR_REDUC:%.*]] = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[COND]]) +; CHECK-NEXT: [[IV_CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: [[EXIT_COND:%.*]] = or i1 [[OR_REDUC]], [[IV_CMP]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: br i1 [[OR_REDUC]], label %[[FOUND:.*]], label %[[NOTFOUND:.*]] +; CHECK: [[FOUND]]: +; CHECK-NEXT: store i64 56, ptr [[P]], align 8 +; CHECK-NEXT: ret i64 1 +; CHECK: [[NOTFOUND]]: +; CHECK-NEXT: ret i64 0 +; +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 + %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, 2 + %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + br i1 %or.reduc, label %found, label %notfound + +found: + store i64 56, ptr %p, align 8 + ret i64 1 + +notfound: + ret i64 0 +} + +define i64 @select_or_reduce_nxv2i1(ptr nocapture noundef readonly %src) { +; CHECK-LABEL: define i64 @select_or_reduce_nxv2i1( +; CHECK-SAME: ptr nocapture noundef readonly [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[COND:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: [[OR_REDUC:%.*]] = tail call i1 @llvm.vector.reduce.or.nxv2i1( [[COND]]) +; CHECK-NEXT: [[IV_CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: [[EXIT_COND:%.*]] = or i1 [[OR_REDUC]], [[IV_CMP]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.vector.reduce.or.nxv2i1( [[TMP2]]) +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP3]], i64 1, i64 0 +; CHECK-NEXT: ret i64 [[SEL]] +; +entry: + %vscale = tail call i64 @llvm.vscale.i64() + %vf = shl nuw nsw i64 %vscale, 1 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load , ptr %arrayidx, align 8 + %cond = icmp eq %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, %vf + %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + %sel = select i1 %or.reduc, i64 1, i64 0 + ret i64 %sel +} + +define i64 @br_or_reduce_nxv2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { +; CHECK-LABEL: define i64 @br_or_reduce_nxv2i1( +; CHECK-SAME: ptr nocapture noundef readonly [[SRC:%.*]], ptr noundef readnone [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[COND:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: [[OR_REDUC:%.*]] = tail call i1 @llvm.vector.reduce.or.nxv2i1( [[COND]]) +; CHECK-NEXT: [[IV_CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: [[EXIT_COND:%.*]] = or i1 [[OR_REDUC]], [[IV_CMP]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = tail call i1 @llvm.vector.reduce.or.nxv2i1( [[TMP2]]) +; CHECK-NEXT: br i1 [[TMP3]], label %[[FOUND:.*]], label %[[NOTFOUND:.*]] +; CHECK: [[FOUND]]: +; CHECK-NEXT: store i64 56, ptr [[P]], align 8 +; CHECK-NEXT: ret i64 1 +; CHECK: [[NOTFOUND]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %vscale = tail call i64 @llvm.vscale.i64() + %vf = shl nuw nsw i64 %vscale, 1 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index + %wide.load = load , ptr %arrayidx, align 8 + %cond = icmp eq %wide.load, splat(ptr zeroinitializer) + %index.next = add nuw i64 %index, %vf + %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) + %iv.cmp = icmp eq i64 %index.next, 4 + %exit.cond = or i1 %or.reduc, %iv.cmp + br i1 %exit.cond, label %middle.split, label %vector.body + +middle.split: + br i1 %or.reduc, label %found, label %notfound + +found: + store i64 56, ptr %p, align 8 + ret i64 1 + +notfound: + ret i64 0 +} + +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.vector.reduce.or.nxv2i1()