-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 #114925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 #114925
Conversation
Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h. Fixes llvm#114893
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesMost of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h. Fixes #114893 Full diff: https://github.com/llvm/llvm-project/pull/114925.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d2d03d4572dac8..96490cdec6c69d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4817,6 +4817,24 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
+ // vslide1{down,up}.vx instead.
+ if (VT.getVectorElementType() == MVT::bf16 ||
+ (VT.getVectorElementType() == MVT::f16 &&
+ !Subtarget.hasVInstructionsF16())) {
+ MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
+ Splat =
+ DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
+ V2 = DAG.getBitcast(
+ IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
+ SDValue Vec = DAG.getNode(
+ IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
+ IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
+ Vec = DAG.getBitcast(ContainerVT, Vec);
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+ }
+
auto OpCode = IsVSlidedown ?
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
index f531ff3a835e45..563b90dfa47efd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -131,23 +133,61 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) {
ret <4 x i64> %v1
}
-define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
-; CHECK-LABEL: vslide1down_2xf16:
+define <2 x bfloat> @vslide1down_2xbf16(<2 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1down_2xbf16:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
+ %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 1, i32 2>
+ ret <2 x bfloat> %v1
+}
+
+define <4 x bfloat> @vslide1down_4xbf16(<4 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1down_4xbf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: ret
+ %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
+ %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x bfloat> %v1
+}
+
+define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1down_2xf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vslide1down_2xf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
%vb = insertelement <2 x half> poison, half %b, i64 0
%v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 1, i32 2>
ret <2 x half> %v1
}
define <4 x half> @vslide1down_4xf16(<4 x half> %v, half %b) {
-; CHECK-LABEL: vslide1down_4xf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vslide1down_4xf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vslide1down_4xf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
%vb = insertelement <4 x half> poison, half %b, i64 0
%v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x half> %v1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index b3390b6eeeccdb..0f6d68dc1a6c76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -140,25 +142,67 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) {
ret <4 x i64> %v1
}
-define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
-; CHECK-LABEL: vslide1up_2xf16:
+define <2 x bfloat> @vslide1up_2xbf16(<2 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1up_2xbf16:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vslide1up.vx v9, v8, a0
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
- %vb = insertelement <2 x half> poison, half %b, i64 0
- %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
- ret <2 x half> %v1
+ %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
+ %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 2, i32 0>
+ ret <2 x bfloat> %v1
}
-define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
-; CHECK-LABEL: vslide1up_4xf16:
+define <4 x bfloat> @vslide1up_4xbf16(<4 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1up_4xbf16:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vslide1up.vx v9, v8, a0
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
+ %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
+ %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+ ret <4 x bfloat> %v1
+}
+
+define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1up_2xf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0
+; ZVFH-NEXT: vmv1r.v v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vslide1up_2xf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0
+; ZVFHMIN-NEXT: vmv1r.v v8, v9
+; ZVFHMIN-NEXT: ret
+ %vb = insertelement <2 x half> poison, half %b, i64 0
+ %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
+ ret <2 x half> %v1
+}
+
+define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1up_4xf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0
+; ZVFH-NEXT: vmv1r.v v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vslide1up_4xf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0
+; ZVFHMIN-NEXT: vmv1r.v v8, v9
+; ZVFHMIN-NEXT: ret
%vb = insertelement <4 x half> poison, half %b, i64 0
%v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x half> %v1
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…#114925) Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h. Fixes llvm#114893
Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h.
Fixes #114893