Skip to content

Commit b3a5540

Browse files
lukel97PhilippRados
authored andcommitted
[RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 (llvm#114925)
Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h. Fixes llvm#114893
1 parent d3d6861 commit b3a5540

File tree

3 files changed

+123
-21
lines changed

3 files changed

+123
-21
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4817,6 +4817,24 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48174817

48184818
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
48194819
auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4820+
4821+
// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4822+
// vslide1{down,up}.vx instead.
4823+
if (VT.getVectorElementType() == MVT::bf16 ||
4824+
(VT.getVectorElementType() == MVT::f16 &&
4825+
!Subtarget.hasVInstructionsF16())) {
4826+
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4827+
Splat =
4828+
DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4829+
V2 = DAG.getBitcast(
4830+
IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4831+
SDValue Vec = DAG.getNode(
4832+
IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
4833+
IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4834+
Vec = DAG.getBitcast(ContainerVT, Vec);
4835+
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4836+
}
4837+
48204838
auto OpCode = IsVSlidedown ?
48214839
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
48224840
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3-
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
4+
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
5+
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
46

57
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
68

@@ -131,23 +133,61 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) {
131133
ret <4 x i64> %v1
132134
}
133135

134-
define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
135-
; CHECK-LABEL: vslide1down_2xf16:
136+
define <2 x bfloat> @vslide1down_2xbf16(<2 x bfloat> %v, bfloat %b) {
137+
; CHECK-LABEL: vslide1down_2xbf16:
136138
; CHECK: # %bb.0:
139+
; CHECK-NEXT: fmv.x.h a0, fa0
137140
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
138-
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
141+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
142+
; CHECK-NEXT: ret
143+
%vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
144+
%v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 1, i32 2>
145+
ret <2 x bfloat> %v1
146+
}
147+
148+
define <4 x bfloat> @vslide1down_4xbf16(<4 x bfloat> %v, bfloat %b) {
149+
; CHECK-LABEL: vslide1down_4xbf16:
150+
; CHECK: # %bb.0:
151+
; CHECK-NEXT: fmv.x.h a0, fa0
152+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
153+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
139154
; CHECK-NEXT: ret
155+
%vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
156+
%v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
157+
ret <4 x bfloat> %v1
158+
}
159+
160+
define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
161+
; ZVFH-LABEL: vslide1down_2xf16:
162+
; ZVFH: # %bb.0:
163+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
164+
; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0
165+
; ZVFH-NEXT: ret
166+
;
167+
; ZVFHMIN-LABEL: vslide1down_2xf16:
168+
; ZVFHMIN: # %bb.0:
169+
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
170+
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
171+
; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
172+
; ZVFHMIN-NEXT: ret
140173
%vb = insertelement <2 x half> poison, half %b, i64 0
141174
%v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 1, i32 2>
142175
ret <2 x half> %v1
143176
}
144177

145178
define <4 x half> @vslide1down_4xf16(<4 x half> %v, half %b) {
146-
; CHECK-LABEL: vslide1down_4xf16:
147-
; CHECK: # %bb.0:
148-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
149-
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
150-
; CHECK-NEXT: ret
179+
; ZVFH-LABEL: vslide1down_4xf16:
180+
; ZVFH: # %bb.0:
181+
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
182+
; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0
183+
; ZVFH-NEXT: ret
184+
;
185+
; ZVFHMIN-LABEL: vslide1down_4xf16:
186+
; ZVFHMIN: # %bb.0:
187+
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
188+
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
189+
; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
190+
; ZVFHMIN-NEXT: ret
151191
%vb = insertelement <4 x half> poison, half %b, i64 0
152192
%v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
153193
ret <4 x half> %v1

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3-
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
4+
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
5+
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
46

57
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
68

@@ -140,25 +142,67 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) {
140142
ret <4 x i64> %v1
141143
}
142144

143-
define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
144-
; CHECK-LABEL: vslide1up_2xf16:
145+
define <2 x bfloat> @vslide1up_2xbf16(<2 x bfloat> %v, bfloat %b) {
146+
; CHECK-LABEL: vslide1up_2xbf16:
145147
; CHECK: # %bb.0:
148+
; CHECK-NEXT: fmv.x.h a0, fa0
146149
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
147-
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
150+
; CHECK-NEXT: vslide1up.vx v9, v8, a0
148151
; CHECK-NEXT: vmv1r.v v8, v9
149152
; CHECK-NEXT: ret
150-
%vb = insertelement <2 x half> poison, half %b, i64 0
151-
%v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
152-
ret <2 x half> %v1
153+
%vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
154+
%v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 2, i32 0>
155+
ret <2 x bfloat> %v1
153156
}
154157

155-
define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
156-
; CHECK-LABEL: vslide1up_4xf16:
158+
define <4 x bfloat> @vslide1up_4xbf16(<4 x bfloat> %v, bfloat %b) {
159+
; CHECK-LABEL: vslide1up_4xbf16:
157160
; CHECK: # %bb.0:
161+
; CHECK-NEXT: fmv.x.h a0, fa0
158162
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
159-
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
163+
; CHECK-NEXT: vslide1up.vx v9, v8, a0
160164
; CHECK-NEXT: vmv1r.v v8, v9
161165
; CHECK-NEXT: ret
166+
%vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
167+
%v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
168+
ret <4 x bfloat> %v1
169+
}
170+
171+
define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
172+
; ZVFH-LABEL: vslide1up_2xf16:
173+
; ZVFH: # %bb.0:
174+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
175+
; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0
176+
; ZVFH-NEXT: vmv1r.v v8, v9
177+
; ZVFH-NEXT: ret
178+
;
179+
; ZVFHMIN-LABEL: vslide1up_2xf16:
180+
; ZVFHMIN: # %bb.0:
181+
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
182+
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
183+
; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0
184+
; ZVFHMIN-NEXT: vmv1r.v v8, v9
185+
; ZVFHMIN-NEXT: ret
186+
%vb = insertelement <2 x half> poison, half %b, i64 0
187+
%v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
188+
ret <2 x half> %v1
189+
}
190+
191+
define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
192+
; ZVFH-LABEL: vslide1up_4xf16:
193+
; ZVFH: # %bb.0:
194+
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
195+
; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0
196+
; ZVFH-NEXT: vmv1r.v v8, v9
197+
; ZVFH-NEXT: ret
198+
;
199+
; ZVFHMIN-LABEL: vslide1up_4xf16:
200+
; ZVFHMIN: # %bb.0:
201+
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
202+
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
203+
; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0
204+
; ZVFHMIN-NEXT: vmv1r.v v8, v9
205+
; ZVFHMIN-NEXT: ret
162206
%vb = insertelement <4 x half> poison, half %b, i64 0
163207
%v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
164208
ret <4 x half> %v1

0 commit comments

Comments
 (0)