|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s |
| 3 | + |
| 4 | +; Check that EVL tail folded loops from the loop vectorizer are able to have the |
| 5 | +; vl of non-VP instructions reduced. |
| 6 | +define void @evl_tail_folded(ptr %p, ptr %q) { |
| 7 | +; CHECK-LABEL: evl_tail_folded: |
| 8 | +; CHECK: # %bb.0: # %entry |
| 9 | +; CHECK-NEXT: li a2, 0 |
| 10 | +; CHECK-NEXT: csrr a3, vlenb |
| 11 | +; CHECK-NEXT: srli a3, a3, 2 |
| 12 | +; CHECK-NEXT: addi a4, a3, 1023 |
| 13 | +; CHECK-NEXT: neg a5, a3 |
| 14 | +; CHECK-NEXT: and a4, a4, a5 |
| 15 | +; CHECK-NEXT: li a5, 1024 |
| 16 | +; CHECK-NEXT: .LBB0_1: # %vector.body |
| 17 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 18 | +; CHECK-NEXT: sub a6, a5, a2 |
| 19 | +; CHECK-NEXT: slli a7, a2, 3 |
| 20 | +; CHECK-NEXT: vsetvli a6, a6, e64, m2, ta, ma |
| 21 | +; CHECK-NEXT: add t0, a0, a7 |
| 22 | +; CHECK-NEXT: vle64.v v8, (t0) |
| 23 | +; CHECK-NEXT: sub a4, a4, a3 |
| 24 | +; CHECK-NEXT: add a7, a1, a7 |
| 25 | +; CHECK-NEXT: vadd.vi v8, v8, 1 |
| 26 | +; CHECK-NEXT: vse64.v v8, (a7) |
| 27 | +; CHECK-NEXT: add a2, a2, a6 |
| 28 | +; CHECK-NEXT: bnez a4, .LBB0_1 |
| 29 | +; CHECK-NEXT: # %bb.2: # %exit |
| 30 | +; CHECK-NEXT: ret |
| 31 | +entry: |
| 32 | + %0 = tail call i64 @llvm.vscale.i64() |
| 33 | + %1 = shl i64 %0, 1 |
| 34 | + %n.rnd.up = add i64 %1, 1023 |
| 35 | + %n.mod.vf = urem i64 %n.rnd.up, %1 |
| 36 | + %n.vec = sub i64 %n.rnd.up, %n.mod.vf |
| 37 | + %2 = tail call i64 @llvm.vscale.i64() |
| 38 | + %3 = shl i64 %2, 1 |
| 39 | + br label %vector.body |
| 40 | + |
| 41 | +vector.body: |
| 42 | + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| 43 | + %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ] |
| 44 | + %avl = sub i64 1024, %evl.based.iv |
| 45 | + %4 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true) |
| 46 | + %5 = getelementptr i64, ptr %p, i64 %evl.based.iv |
| 47 | + %vp.op.load = tail call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %5, <vscale x 2 x i1> splat (i1 true), i32 %4) |
| 48 | + %6 = add <vscale x 2 x i64> %vp.op.load, splat (i64 1) |
| 49 | + %7 = getelementptr i64, ptr %q, i64 %evl.based.iv |
| 50 | + tail call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %6, ptr %7, <vscale x 2 x i1> splat (i1 true), i32 %4) |
| 51 | + %8 = zext i32 %4 to i64 |
| 52 | + %index.evl.next = add i64 %evl.based.iv, %8 |
| 53 | + %index.next = add i64 %index, %3 |
| 54 | + %9 = icmp eq i64 %index.next, %n.vec |
| 55 | + br i1 %9, label %exit, label %vector.body |
| 56 | + |
| 57 | +exit: |
| 58 | + ret void |
| 59 | +} |
0 commit comments