From 7896ea129be982bced4a2efae518e24352c719de Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 14 Feb 2025 16:34:53 +0800 Subject: [PATCH 1/4] Precommit tests --- .../vectorize-force-tail-with-evl-div.ll | 407 ++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll new file mode 100644 index 0000000000000..75c55cb5fe833 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll @@ -0,0 +1,407 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP + +define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; IF-EVL-LABEL: define void @test_sdiv( +; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: +; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IF-EVL: [[VECTOR_PH]]: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; IF-EVL: [[VECTOR_BODY]]: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.sdiv.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: [[MIDDLE_BLOCK]]: +; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: br label %[[LOOP:.*]] +; IF-EVL: [[LOOP]]: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 +; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 +; IF-EVL-NEXT: [[TMP18:%.*]] = sdiv i64 [[TMP16]], [[TMP17]] +; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 +; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: [[EXIT]]: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: define void @test_sdiv( +; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: +; NO-VP-NEXT: br label %[[LOOP:.*]] +; NO-VP: [[LOOP]]: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8 +; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8 +; NO-VP-NEXT: [[TMP2:%.*]] = sdiv i64 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8 +; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] +; NO-VP: [[EXIT]]: +; NO-VP-NEXT: ret void +; +loop.preheader: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ] + %a.gep = getelementptr i64, ptr %a, i64 %iv + %0 = load i64, ptr %a.gep + %b.gep = getelementptr i64, ptr %b, i64 %iv + %1 = load i64, ptr %b.gep + %2 = sdiv i64 %0, %1 + %c.gep = getelementptr i64, ptr %c, i64 %iv + store i64 %2, ptr %c.gep + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} + + +define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; IF-EVL-LABEL: define void @test_udiv( +; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: +; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IF-EVL: [[VECTOR_PH]]: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; IF-EVL: [[VECTOR_BODY]]: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.udiv.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL: [[MIDDLE_BLOCK]]: +; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: br label %[[LOOP:.*]] +; IF-EVL: [[LOOP]]: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 +; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 +; IF-EVL-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP16]], [[TMP17]] +; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 +; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL: [[EXIT]]: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: define void @test_udiv( +; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: +; NO-VP-NEXT: br label %[[LOOP:.*]] +; NO-VP: [[LOOP]]: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8 +; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8 +; NO-VP-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8 +; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] +; NO-VP: [[EXIT]]: +; NO-VP-NEXT: ret void +; +loop.preheader: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ] + %a.gep = getelementptr i64, ptr %a, i64 %iv + %0 = load i64, ptr %a.gep + %b.gep = getelementptr i64, ptr %b, i64 %iv + %1 = load i64, ptr %b.gep + %2 = udiv i64 %0, %1 + %c.gep = getelementptr i64, ptr %c, i64 %iv + store i64 %2, ptr %c.gep + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} + +define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; IF-EVL-LABEL: define void @test_srem( +; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: +; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IF-EVL: [[VECTOR_PH]]: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; IF-EVL: [[VECTOR_BODY]]: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.srem.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL: [[MIDDLE_BLOCK]]: +; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: br label %[[LOOP:.*]] +; IF-EVL: [[LOOP]]: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 +; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 +; IF-EVL-NEXT: [[TMP18:%.*]] = srem i64 [[TMP16]], [[TMP17]] +; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 +; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL: [[EXIT]]: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: define void @test_srem( +; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: +; NO-VP-NEXT: br label %[[LOOP:.*]] +; NO-VP: [[LOOP]]: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8 +; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8 +; NO-VP-NEXT: [[TMP2:%.*]] = srem i64 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8 +; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] +; NO-VP: [[EXIT]]: +; NO-VP-NEXT: ret void +; +loop.preheader: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ] + %a.gep = getelementptr i64, ptr %a, i64 %iv + %0 = load i64, ptr %a.gep + %b.gep = getelementptr i64, ptr %b, i64 %iv + %1 = load i64, ptr %b.gep + %2 = srem i64 %0, %1 + %c.gep = getelementptr i64, ptr %c, i64 %iv + store i64 %2, ptr %c.gep + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} + +define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; IF-EVL-LABEL: define void @test_urem( +; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: +; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IF-EVL: [[VECTOR_PH]]: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; IF-EVL: [[VECTOR_BODY]]: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.urem.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL: [[MIDDLE_BLOCK]]: +; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: br label %[[LOOP:.*]] +; IF-EVL: [[LOOP]]: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 +; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 +; IF-EVL-NEXT: [[TMP18:%.*]] = urem i64 [[TMP16]], [[TMP17]] +; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 +; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL: [[EXIT]]: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: define void @test_urem( +; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]: +; NO-VP-NEXT: br label %[[LOOP:.*]] +; NO-VP: [[LOOP]]: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8 +; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8 +; NO-VP-NEXT: [[TMP2:%.*]] = urem i64 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8 +; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] +; NO-VP: [[EXIT]]: +; NO-VP-NEXT: ret void +; +loop.preheader: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ] + %a.gep = getelementptr i64, ptr %a, i64 %iv + %0 = load i64, ptr %a.gep + %b.gep = getelementptr i64, ptr %b, i64 %iv + %1 = load i64, ptr %b.gep + %2 = urem i64 %0, %1 + %c.gep = getelementptr i64, ptr %c, i64 %iv + store i64 %2, ptr %c.gep + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} +;. +; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. From 5f1738013978a8a2519d81c1b1991d9546359490 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 7 Feb 2025 12:40:06 +0800 Subject: [PATCH 2/4] [VPlan] Don't convert widen recipes to VP intrinsics in EVL transform --- llvm/lib/Transforms/Vectorize/VPlan.h | 53 +-------------- .../Transforms/Vectorize/VPlanAnalysis.cpp | 5 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 48 ------------- .../Transforms/Vectorize/VPlanTransforms.cpp | 30 -------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 - .../Transforms/Vectorize/VPlanVerifier.cpp | 4 -- .../LoopVectorize/RISCV/inloop-reduction.ll | 6 +- .../truncate-to-minimal-bitwidth-evl-crash.ll | 8 +-- .../RISCV/type-info-cache-evl-crash.ll | 14 ++-- ...-force-tail-with-evl-bin-unary-ops-args.ll | 36 +++++----- ...ize-force-tail-with-evl-call-intrinsics.ll | 68 ++++++++++++------- ...ize-force-tail-with-evl-cast-intrinsics.ll | 20 +++--- ...rize-force-tail-with-evl-cond-reduction.ll | 4 +- .../vectorize-force-tail-with-evl-div.ll | 8 +-- ...-force-tail-with-evl-intermediate-store.ll | 2 +- ...e-force-tail-with-evl-known-no-overflow.ll | 6 +- ...ze-force-tail-with-evl-masked-loadstore.ll | 2 +- ...vectorize-force-tail-with-evl-reduction.ll | 12 ++-- .../RISCV/vectorize-vp-intrinsics.ll | 2 +- .../RISCV/vplan-vp-call-intrinsics.ll | 44 ++++++------ .../RISCV/vplan-vp-cast-intrinsics.ll | 40 +++++------ .../RISCV/vplan-vp-intrinsics-reduction.ll | 2 +- .../RISCV/vplan-vp-intrinsics.ll | 2 +- .../RISCV/vplan-vp-select-intrinsics.ll | 4 +- 24 files changed, 154 insertions(+), 267 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8089cfd1ce802..d86914f0fb026 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -523,7 +523,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenGEPSC: case VPRecipeBase::VPWidenIntrinsicSC: case VPRecipeBase::VPWidenSC: - case VPRecipeBase::VPWidenEVLSC: case VPRecipeBase::VPWidenSelectSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: @@ -710,7 +709,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || @@ -1113,8 +1111,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { } static inline bool classof(const VPRecipeBase *R) { - return R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC; + return R->getVPDefID() == VPRecipeBase::VPWidenSC; } static inline bool classof(const VPUser *U) { @@ -1139,54 +1136,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { #endif }; -/// A recipe for widening operations with vector-predication intrinsics with -/// explicit vector length (EVL). -class VPWidenEVLRecipe : public VPWidenRecipe { - using VPRecipeWithIRFlags::transferFlags; - -public: - template - VPWidenEVLRecipe(Instruction &I, iterator_range Operands, VPValue &EVL) - : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) { - addOperand(&EVL); - } - VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL) - : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) { - transferFlags(W); - } - - ~VPWidenEVLRecipe() override = default; - - VPWidenRecipe *clone() override final { - llvm_unreachable("VPWidenEVLRecipe cannot be cloned"); - return nullptr; - } - - VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC); - - VPValue *getEVL() { return getOperand(getNumOperands() - 1); } - const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } - - /// Produce a vp-intrinsic using the opcode and operands of the recipe, - /// processing EVL elements. - void execute(VPTransformState &State) override final; - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - // EVL in that recipe is always the last operand, thus any use before means - // the VPValue should be vectorized. - return getEVL() == Op; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override final; -#endif -}; - /// VPWidenCastRecipe is a recipe to create vector cast instructions. class VPWidenCastRecipe : public VPRecipeWithIRFlags { /// Cast instruction opcode. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index bf61251fc9133..8e5b37a35dda7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -251,9 +251,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { VPPartialReductionRecipe>([this](const VPRecipeBase *R) { return inferScalarType(R->getOperand(0)); }) - .Case( + .Case( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) .Case([](const VPWidenIntrinsicRecipe *R) { return R->getResultType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d57a6c481748c..8a928fa71fab8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -86,7 +86,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenLoadSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -132,7 +131,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenIntOrFpInductionSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -173,7 +171,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPWidenPHISC: case VPWidenPointerInductionSC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -1602,42 +1599,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, } } -void VPWidenEVLRecipe::execute(VPTransformState &State) { - unsigned Opcode = getOpcode(); - // TODO: Support other opcodes - if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) - llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); - - State.setDebugLocFrom(getDebugLoc()); - - assert(State.get(getOperand(0))->getType()->isVectorTy() && - "VPWidenEVLRecipe should not be used for scalars"); - - VPValue *EVL = getEVL(); - Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true); - IRBuilderBase &BuilderIR = State.Builder; - VectorBuilder Builder(BuilderIR); - Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue()); - - SmallVector Ops; - for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) { - VPValue *VPOp = getOperand(I); - Ops.push_back(State.get(VPOp)); - } - - Builder.setMask(Mask).setEVL(EVLArg); - Value *VPInst = - Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op"); - // Currently vp-intrinsics only accept FMF flags. - // TODO: Enable other flags when support is added. - if (isa(VPInst)) - setFlags(cast(VPInst)); - - State.set(this, VPInst); - State.addMetadata(VPInst, - dyn_cast_or_null(getUnderlyingValue())); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -1647,15 +1608,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, printFlags(O); printOperands(O, SlotTracker); } - -void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN "; - printAsOperand(O, SlotTracker); - O << " = vp." << Instruction::getOpcodeName(getOpcode()); - printFlags(O); - printOperands(O, SlotTracker); -} #endif void VPWidenCastRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6c917e4eef655..ce81b2e147df8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1662,40 +1662,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, VPValue *NewMask = GetNewMask(S->getMask()); return new VPWidenStoreEVLRecipe(*S, EVL, NewMask); }) - .Case([&](VPWidenRecipe *W) -> VPRecipeBase * { - unsigned Opcode = W->getOpcode(); - if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) - return nullptr; - return new VPWidenEVLRecipe(*W, EVL); - }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); return new VPReductionEVLRecipe(*Red, EVL, NewMask); }) - .Case( - [&](auto *CR) -> VPRecipeBase * { - Intrinsic::ID VPID; - if (auto *CallR = dyn_cast(CR)) { - VPID = - VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID()); - } else { - auto *CastR = cast(CR); - VPID = VPIntrinsic::getForOpcode(CastR->getOpcode()); - } - - // Not all intrinsics have a corresponding VP intrinsic. - if (VPID == Intrinsic::not_intrinsic) - return nullptr; - assert(VPIntrinsic::getMaskParamPos(VPID) && - VPIntrinsic::getVectorLengthParamPos(VPID) && - "Expected VP intrinsic to have mask and EVL"); - - SmallVector Ops(CR->operands()); - Ops.push_back(&AllOneMask); - Ops.push_back(&EVL); - return new VPWidenIntrinsicRecipe( - VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc()); - }) .Case([&](VPWidenSelectRecipe *Sel) { SmallVector Ops(Sel->operands()); Ops.push_back(&EVL); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index aabc4ab571e7a..a058b2a121d59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -351,7 +351,6 @@ class VPDef { VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, - VPWidenEVLSC, VPWidenSelectSC, VPBlendSC, VPHistogramSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 96156de444f88..a4b309d6dcd9f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -145,10 +145,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) - .Case([&](const VPWidenEVLRecipe *W) { - return VerifyEVLUse(*W, - Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); - }) .Case( [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 14818199072c2..b96a44a546a14 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -143,8 +143,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call @llvm.vp.sext.nxv4i32.nxv4i16( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VEC_PHI]], [[TMP9]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext [[VP_OP_LOAD]] to +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add [[VEC_PHI]], [[TMP9]] ; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP5]]) ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] @@ -200,7 +200,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call @llvm.vp.sext.nxv8i32.nxv8i16( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = sext [[VP_OP_LOAD]] to ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 68b36f23de4b0..ba7158eb02d90 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -27,10 +27,10 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vp.zext.nxv1i16.nxv1i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.mul.nxv1i16( zeroinitializer, [[TMP7]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[VP_OP1:%.*]] = call @llvm.vp.lshr.nxv1i16( [[VP_OP]], trunc ( splat (i32 1) to ), splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vp.trunc.nxv1i8.nxv1i16( [[VP_OP1]], splat (i1 true), i32 [[TMP3]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext [[VP_OP_LOAD]] to +; CHECK-NEXT: [[TMP12:%.*]] = mul zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[VP_OP1:%.*]] = lshr [[TMP12]], trunc ( splat (i32 1) to ) +; CHECK-NEXT: [[TMP8:%.*]] = trunc [[VP_OP1]] to ; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0( [[TMP8]], align 1 zeroinitializer, splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index 7de51bc3a8a68..c95414db18bef 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -44,15 +44,15 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.vp.zext.nxv8i32.nxv8i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.mul.nxv8i32( [[TMP15]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[VP_OP2:%.*]] = call @llvm.vp.ashr.nxv8i32( [[TMP15]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[VP_OP3:%.*]] = call @llvm.vp.or.nxv8i32( [[VP_OP2]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP15:%.*]] = zext [[VP_OP_LOAD]] to +; CHECK-NEXT: [[VP_OP:%.*]] = mul [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = ashr [[TMP15]], zeroinitializer +; CHECK-NEXT: [[VP_OP3:%.*]] = or [[TMP23]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = icmp ult [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.vp.select.nxv8i32( [[TMP16]], [[VP_OP3]], zeroinitializer, i32 [[TMP11]]) -; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vp.trunc.nxv8i8.nxv8i32( [[TMP17]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( [[TMP18]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.vp.trunc.nxv8i16.nxv8i32( [[VP_OP]], splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP24:%.*]] = trunc [[TMP17]] to +; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( [[TMP24]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc [[VP_OP]] to ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( [[TMP19]], align 2 zeroinitializer, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index 13286a3394126..e7181f7f30c77 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -42,7 +42,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.and.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = and [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -136,7 +136,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.or.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = or [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -230,7 +230,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.xor.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = xor [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -324,7 +324,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.shl.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -418,7 +418,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.lshr.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = lshr [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -512,7 +512,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.ashr.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = ashr [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -606,7 +606,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -700,7 +700,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.sub.nxv16i8( [[VP_OP_LOAD]], splat (i8 1), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = sub [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -794,7 +794,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.mul.nxv16i8( [[VP_OP_LOAD]], splat (i8 3), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = mul [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -888,7 +888,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.sdiv.nxv16i8( [[VP_OP_LOAD]], splat (i8 3), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -982,7 +982,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.udiv.nxv16i8( [[VP_OP_LOAD]], splat (i8 3), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -1076,7 +1076,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.srem.nxv16i8( [[VP_OP_LOAD]], splat (i8 3), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = srem [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -1170,7 +1170,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.urem.nxv16i8( [[VP_OP_LOAD]], splat (i8 3), splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = urem [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) @@ -1267,7 +1267,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1362,7 +1362,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fsub.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fsub fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1457,7 +1457,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fmul.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fmul fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1552,7 +1552,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fdiv.nxv4f32( [[VP_OP_LOAD]], splat (float 3.000000e+00), splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fdiv fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) @@ -1700,7 +1700,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast @llvm.vp.fneg.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fneg fast [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll index 28ba5efb53c2a..f19e581d1c028 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll @@ -55,7 +55,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.vp.smax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.smax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -63,7 +63,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: @@ -80,7 +80,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -170,7 +170,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.vp.smin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.smin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -195,7 +195,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -285,7 +285,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.vp.umax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.umax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -310,7 +310,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -400,7 +400,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.vp.umin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.umin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -425,7 +425,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -507,7 +507,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.vp.ctlz.nxv4i32( [[VP_OP_LOAD]], i1 true, splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.ctlz.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP24]], ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP9]]) @@ -530,7 +530,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: store i32 [[TMP19]], ptr [[GEP3]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -607,7 +607,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP13]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = call @llvm.vp.cttz.nxv4i32( [[VP_OP_LOAD]], i1 true, splat (i1 true), i32 [[TMP13]]) +; IF-EVL-NEXT: [[TMP17:%.*]] = call @llvm.cttz.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP17]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP13]]) @@ -630,7 +630,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: store i32 [[TMP23]], ptr [[GEP3]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -708,9 +708,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = call @llvm.vp.fpext.nxv4f64.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.vp.lrint.nxv4i64.nxv4f64( [[TMP13]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call @llvm.vp.trunc.nxv4i32.nxv4i64( [[TMP14]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP27:%.*]] = fpext [[VP_OP_LOAD]] to +; IF-EVL-NEXT: [[TMP28:%.*]] = call @llvm.lrint.nxv4i64.nxv4f64( [[TMP27]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = trunc [[TMP28]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP15]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -735,7 +735,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -817,9 +817,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = call @llvm.vp.fpext.nxv4f64.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.vp.llrint.nxv4i64.nxv4f64( [[TMP13]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call @llvm.vp.trunc.nxv4i32.nxv4i64( [[TMP14]], splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP27:%.*]] = fpext [[VP_OP_LOAD]] to +; IF-EVL-NEXT: [[TMP28:%.*]] = call @llvm.llrint.nxv4i64.nxv4f64( [[TMP27]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = trunc [[TMP28]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP15]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP9]]) @@ -844,7 +844,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -926,7 +926,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.vp.abs.nxv4i32( [[VP_OP_LOAD]], i1 true, splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.abs.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP24]], ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP9]]) @@ -949,7 +949,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: store i32 [[COND]], ptr [[GEP9]], align 4 ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -1054,3 +1054,25 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg) declare i64 @llvm.lrint.i64.f64(double) declare i64 @llvm.llrint.i64.f64(double) declare i32 @llvm.abs.i32(i32, i1 immarg) +;. +; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]} +; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]} +; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll index 4557e95f1e1b6..7450c303c1045 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll @@ -47,7 +47,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.sext.nxv2i64.nxv2i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = sext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP16]], ptr align 8 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] @@ -147,7 +147,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META9:![0-9]+]] -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.zext.nxv2i64.nxv2i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP16]], ptr align 8 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META12:![0-9]+]], !noalias [[META9]] @@ -247,7 +247,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP15]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META16:![0-9]+]] -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.trunc.nxv2i32.nxv2i64( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = trunc [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i32.p0( [[TMP16]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META19:![0-9]+]], !noalias [[META16]] @@ -347,7 +347,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META23:![0-9]+]] -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.fpext.nxv2f64.nxv2f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = fpext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f64.p0( [[TMP16]], ptr align 8 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META26:![0-9]+]], !noalias [[META23]] @@ -447,7 +447,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2f64.p0(ptr align 8 [[TMP15]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META30:![0-9]+]] -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = fptrunc [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f32.p0( [[TMP16]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META33:![0-9]+]], !noalias [[META30]] @@ -547,7 +547,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = call @llvm.vp.sitofp.nxv4f32.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = sitofp [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP14]]) @@ -647,7 +647,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = call @llvm.vp.uitofp.nxv4f32.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = uitofp [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP14]]) @@ -747,7 +747,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = call @llvm.vp.fptosi.nxv4i32.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = fptosi [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP14]]) @@ -847,7 +847,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = call @llvm.vp.fptoui.nxv4i32.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = fptoui [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP14]]) @@ -947,7 +947,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP17]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = call @llvm.vp.inttoptr.nxv2p0.nxv2i64( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = inttoptr [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP15]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds ptr, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2p0.p0( [[TMP18]], ptr align 8 [[TMP20]], splat (i1 true), i32 [[TMP14]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 78059955b2da5..fefbdb473e2ab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -53,7 +53,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call @llvm.vp.select.nxv4i32( [[TMP18]], [[VP_OP_LOAD]], zeroinitializer, i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[TMP19]], [[VEC_PHI]], splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add [[TMP19]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]] @@ -302,7 +302,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) -; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call @llvm.vp.add.nxv4i32( [[VEC_PHI]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] ; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor [[TMP18]], splat (i1 true) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select [[TMP15]], [[TMP20]], zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select [[TMP21]], [[VEC_PHI]], [[TMP19]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll index 75c55cb5fe833..0bfab2da51fa7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll @@ -37,7 +37,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.sdiv.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) @@ -134,7 +134,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.udiv.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) @@ -230,7 +230,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.srem.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = srem [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) @@ -326,7 +326,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[VP_OP_LOAD1]], splat (i64 1), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.urem.nxv2i64( [[VP_OP_LOAD]], [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = urem [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP5]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll index 733c05fd9259c..69988e03b2657 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll @@ -59,7 +59,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] -; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP12]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll index dc74f44688fdf..b4ef2c800f5bd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll @@ -39,7 +39,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv2i64( [[VP_OP_LOAD]], splat (i64 1), splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] @@ -115,7 +115,7 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv2i64( [[VP_OP_LOAD]], splat (i64 1), splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] @@ -187,7 +187,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv2i64( [[VP_OP_LOAD]], splat (i64 1), splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index ab58bb07702c1..4452f3860274d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -50,7 +50,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD3]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index b81805d473002..ed44fe33999ad 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -38,7 +38,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -271,7 +271,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.or.nxv4i32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = or [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -391,7 +391,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.and.nxv4i32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = and [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -511,7 +511,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.xor.nxv4i32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = xor [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -1139,7 +1139,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc @llvm.vp.fadd.nxv4f32( [[VP_OP_LOAD]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd reassoc [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -1857,7 +1857,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.vp.fmuladd.nxv4f32( [[VP_OP_LOAD]], [[VP_OP_LOAD1]], [[VEC_PHI]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[VP_OP_LOAD]], [[VP_OP_LOAD1]], [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP17]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP16]], [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 55a84733191fd..9069c6bf7a1a0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -39,7 +39,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD1]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP12]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll index 81a2675dc6583..a213608857728 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll @@ -30,10 +30,10 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMAX]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -86,10 +86,10 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMIN]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -142,10 +142,10 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMAX]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -198,10 +198,10 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMIN]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -251,10 +251,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTLZ:%.+]]> = call llvm.ctlz(ir<[[LD1]]>, ir) ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTLZ]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -302,10 +302,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTTZ:%.+]]> = call llvm.cttz(ir<[[LD1]]>, ir) ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTTZ]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -353,12 +353,12 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir, vp<[[EVL]]>) -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LRINT:%.+]]> = call llvm.vp.lrint(vp<[[FPEXT]]>, ir, vp<[[EVL]]>) -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LRINT]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.lrint(ir<[[FPEXT]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LRINT]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -408,12 +408,12 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir, vp<[[EVL]]>) -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LLRINT:%.+]]> = call llvm.vp.llrint(vp<[[FPEXT]]>, ir, vp<[[EVL]]>) -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LLRINT]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.llrint(ir<[[FPEXT]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LLRINT]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -463,10 +463,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ABS:%.+]]> = call llvm.abs(ir<[[LD1]]>, ir) ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ABS]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll index 40cf02c4663fe..b8f20aa670b5c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll @@ -26,10 +26,10 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SEXT:%.+]]> = call llvm.vp.sext(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SEXT]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -79,10 +79,10 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ZEXT:%.+]]> = call llvm.vp.zext(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ZEXT]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -130,10 +130,10 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -181,10 +181,10 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPEXT]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -232,10 +232,10 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTRUNC:%.+]]> = call llvm.vp.fptrunc(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTRUNC]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -283,10 +283,10 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SITOFP:%.+]]> = call llvm.vp.sitofp(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SITOFP]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -334,10 +334,10 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UITOFP:%.+]]> = call llvm.vp.uitofp(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[UITOFP]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -385,10 +385,10 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTOSI:%.+]]> = call llvm.vp.fptosi(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptosi ir<[[LD1]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTOSI]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -436,10 +436,10 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTOUI:%.+]]> = call llvm.vp.fptoui(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOUI:%.+]]> = fptoui ir<[[LD1]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTOUI]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -487,10 +487,10 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[INTTOPTR:%.+]]> = call llvm.vp.inttoptr(ir<[[LD1]]>, ir, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[INTTOPTR:%.+]]> = inttoptr ir<[[LD1]]> to ptr ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[INTTOPTR]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 0daf15d04af35..3594b3f047363 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -49,7 +49,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[LD1]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]> +; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) ; IF-EVL-OUTLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index e64ea387d4582..4cef3f029e20f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -35,7 +35,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 90de5b6bfd1d3..67be80393d829 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -44,9 +44,9 @@ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> - ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]> ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>) - ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add vp<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]> ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> From 77ab27466f9f08e768144e9d2f64325fe7e136da Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 18 Feb 2025 13:04:03 +0800 Subject: [PATCH 3/4] Add llc test for codegen of EVL tail folded loop --- .../RISCV/rvv/vl-opt-evl-tail-folding.ll | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vl-opt-evl-tail-folding.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-evl-tail-folding.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-evl-tail-folding.ll new file mode 100644 index 0000000000000..48f93c8323cfd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-evl-tail-folding.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +; Check that EVL tail folded loops from the loop vectorizer are able to have the +; vl of non-VP instructions reduced. +define void @evl_tail_folded(ptr %p, ptr %q) { +; CHECK-LABEL: evl_tail_folded: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: addi a4, a3, 1023 +; CHECK-NEXT: neg a5, a3 +; CHECK-NEXT: and a4, a4, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: .LBB0_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub a6, a5, a2 +; CHECK-NEXT: slli a7, a2, 3 +; CHECK-NEXT: vsetvli a6, a6, e64, m2, ta, ma +; CHECK-NEXT: add t0, a0, a7 +; CHECK-NEXT: vle64.v v8, (t0) +; CHECK-NEXT: sub a4, a4, a3 +; CHECK-NEXT: add a7, a1, a7 +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: vse64.v v8, (a7) +; CHECK-NEXT: add a2, a2, a6 +; CHECK-NEXT: bnez a4, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.vscale.i64() + %1 = shl i64 %0, 1 + %n.rnd.up = add i64 %1, 1023 + %n.mod.vf = urem i64 %n.rnd.up, %1 + %n.vec = sub i64 %n.rnd.up, %n.mod.vf + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl i64 %2, 1 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ] + %avl = sub i64 1024, %evl.based.iv + %4 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true) + %5 = getelementptr i64, ptr %p, i64 %evl.based.iv + %vp.op.load = tail call @llvm.vp.load.nxv2i64.p0(ptr %5, splat (i1 true), i32 %4) + %6 = add %vp.op.load, splat (i64 1) + %7 = getelementptr i64, ptr %q, i64 %evl.based.iv + tail call void @llvm.vp.store.nxv2i64.p0( %6, ptr %7, splat (i1 true), i32 %4) + %8 = zext i32 %4 to i64 + %index.evl.next = add i64 %evl.based.iv, %8 + %index.next = add i64 %index, %3 + %9 = icmp eq i64 %index.next, %n.vec + br i1 %9, label %exit, label %vector.body + +exit: + ret void +} From f9ce2400c7c392699d3fd88674d12132a8b16025 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 18 Feb 2025 22:06:52 +0800 Subject: [PATCH 4/4] Remove VPWidenEVLRecipe pattern match check --- llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ebc82c01467cf..8c11d93734667 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -155,14 +155,6 @@ struct Recipe_match { if ((!matchRecipeAndOpcode(R) && ...)) return false; - if (!(std::is_same_v || ...) && - isa(R)) { - // Don't match VPWidenEVLRecipe if it is not explicitly part of RecipeTys. - // Otherwise we might match it unexpectedly when trying to match - // VPWidenRecipe, of which VPWidenEVLRecipe is a subclass of. - return false; - } - assert(R->getNumOperands() == std::tuple_size::value && "recipe with matched opcode the expected number of operands");