Skip to content

Commit c67653f

Browse files
authored
[RISCV] Support vXf16 vector_shuffle with Zvfhmin. (#97491)
We can shuffle vXf16 vectors just like vXi16 vectors. We don't need any FP instructions. Update the predicates for vrgather and vslides patterns to only check the predicates based on the equivalent integer type. If we use the FP type it will check Zvfh and block Zvfhmin. These are probably not the only patterns that need to be fixed, but the test from the bug report no longer crashes. Fixes #97477
1 parent 30df629 commit c67653f

File tree

3 files changed

+63
-7
lines changed

3 files changed

+63
-7
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,7 +1319,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13191319
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
13201320
VT, Custom);
13211321
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1322-
ISD::EXTRACT_SUBVECTOR},
1322+
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_SHUFFLE},
13231323
VT, Custom);
13241324
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
13251325
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@@ -5040,6 +5040,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
50405040
return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
50415041
}
50425042

5043+
MVT SplatVT = ContainerVT;
5044+
5045+
// If we don't have Zfh, we need to use an integer scalar load.
5046+
if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5047+
SVT = MVT::i16;
5048+
SplatVT = ContainerVT.changeVectorElementType(SVT);
5049+
}
5050+
50435051
// Otherwise use a scalar load and splat. This will give the best
50445052
// opportunity to fold a splat into the operation. ISel can turn it into
50455053
// the x0 strided load if we aren't able to fold away the select.
@@ -5055,10 +5063,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
50555063
Ld->getMemOperand()->getFlags());
50565064
DAG.makeEquivalentMemoryOrdering(Ld, V);
50575065

5058-
unsigned Opc =
5059-
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
5066+
unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5067+
: RISCVISD::VMV_V_X_VL;
50605068
SDValue Splat =
5061-
DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
5069+
DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5070+
Splat = DAG.getBitcast(ContainerVT, Splat);
50625071
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
50635072
}
50645073

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,8 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
21172117

21182118
multiclass VPatSlideVL_VX_VI<SDNode vop, string instruction_name> {
21192119
foreach vti = AllVectors in {
2120-
let Predicates = GetVTypePredicates<vti>.Predicates in {
2120+
defvar ivti = GetIntVTypeInfo<vti>.Vti;
2121+
let Predicates = GetVTypePredicates<ivti>.Predicates in {
21212122
def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
21222123
(vti.Vector vti.RegClass:$rs1),
21232124
uimm5:$rs2, (vti.Mask V0),
@@ -3001,8 +3002,7 @@ foreach vti = AllFloatVectors in {
30013002
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
30023003
}
30033004
defvar ivti = GetIntVTypeInfo<vti>.Vti;
3004-
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
3005-
GetVTypePredicates<ivti>.Predicates) in {
3005+
let Predicates = GetVTypePredicates<ivti>.Predicates in {
30063006
def : Pat<(vti.Vector
30073007
(riscv_vrgather_vv_vl vti.RegClass:$rs2,
30083008
(ivti.Vector vti.RegClass:$rs1),

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
33
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
5+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
46

57
define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
68
; CHECK-LABEL: shuffle_v4f16:
@@ -262,6 +264,51 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
262264
%s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
263265
ret <8 x double> %s
264266
}
267+
268+
define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
269+
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
270+
; CHECK: # %bb.0:
271+
; CHECK-NEXT: lui a0, 4096
272+
; CHECK-NEXT: addi a0, a0, 513
273+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
274+
; CHECK-NEXT: vmv.s.x v9, a0
275+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
276+
; CHECK-NEXT: vsext.vf2 v10, v9
277+
; CHECK-NEXT: vrgather.vv v9, v8, v10
278+
; CHECK-NEXT: vmv1r.v v8, v9
279+
; CHECK-NEXT: ret
280+
%s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
281+
ret <4 x half> %s
282+
}
283+
284+
define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
285+
; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
286+
; CHECK: # %bb.0:
287+
; CHECK-NEXT: lui a0, %hi(.LCPI21_0)
288+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0)
289+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
290+
; CHECK-NEXT: vle16.v v11, (a0)
291+
; CHECK-NEXT: vmv.v.i v0, 8
292+
; CHECK-NEXT: vrgather.vv v10, v8, v11
293+
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
294+
; CHECK-NEXT: vmv1r.v v8, v10
295+
; CHECK-NEXT: ret
296+
%s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
297+
ret <4 x half> %s
298+
}
299+
300+
define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) {
301+
; CHECK-LABEL: vrgather_shuffle_vx_v4f16_load:
302+
; CHECK: # %bb.0:
303+
; CHECK-NEXT: addi a0, a0, 2
304+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
305+
; CHECK-NEXT: vlse16.v v8, (a0), zero
306+
; CHECK-NEXT: ret
307+
%v = load <4 x half>, ptr %p
308+
%s = shufflevector <4 x half> %v, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
309+
ret <4 x half> %s
310+
}
311+
265312
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
266313
; RV32: {{.*}}
267314
; RV64: {{.*}}

0 commit comments

Comments
 (0)