diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll new file mode 100644 index 0000000000000..9206f529cbfd3 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv32 -mattr=+m,+v | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+m,+v | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv32 -mattr=+v,+experimental-zvbb | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+experimental-zvbb | FileCheck %s + +define <4 x float> @rint_v4f32(ptr %a) { +; CHECK-LABEL: define <4 x float> @rint_v4f32( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +entry: + %0 = load <4 x float>, ptr %a + %vecext = extractelement <4 x float> %0, i64 0 + %1 = call float @llvm.rint.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i64 0 + %vecext.1 = extractelement <4 x float> %0, i64 1 + %2 = call float @llvm.rint.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i64 1 + %vecext.2 = extractelement <4 x float> %0, i64 2 + %3 = call float @llvm.rint.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i64 2 + %vecext.3 = extractelement <4 x float> %0, i64 3 + %4 = call float @llvm.rint.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i64 3 + ret <4 x float> %vecins.3 +} + +define <4 x i64> @lrint_v4i64f32(ptr %a) { +; CHECK-LABEL: define <4 x i64> @lrint_v4i64f32( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3 +; CHECK-NEXT: ret <4 x i64> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a + %vecext = extractelement <4 x float> %0, i64 0 + %1 = call i64 @llvm.lrint.i64.f32(float %vecext) + %vecins = insertelement <4 x i64> undef, i64 %1, i64 0 + %vecext.1 = extractelement <4 x float> %0, i64 1 + %2 = call i64 @llvm.lrint.i64.f32(float %vecext.1) + %vecins.1 = insertelement <4 x i64> %vecins, i64 %2, i64 1 + %vecext.2 = extractelement <4 x float> %0, i64 2 + %3 = call i64 @llvm.lrint.i64.f32(float %vecext.2) + %vecins.2 = insertelement <4 x i64> %vecins.1, i64 %3, i64 2 + %vecext.3 = extractelement <4 x float> %0, i64 3 + %4 = call i64 @llvm.lrint.i64.f32(float %vecext.3) + %vecins.3 = insertelement <4 x i64> %vecins.2, i64 %4, i64 3 + ret <4 x i64> %vecins.3 +} + +declare float @llvm.rint.f32(float) +declare i64 @llvm.lrint.i64.f32(float)