Skip to content

[RISCV] Lower constant build_vectors with few non-sign bits via vsext #65648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3456,6 +3456,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
return Res;

// If the number of signbits allows, see if we can lower as a <N x i8>.
// We restrict this to N <= 4 to ensure the resulting narrow vector is
// 32 bits of smaller and can thus be materialized cheaply from scalar.
// The main motivation for this is the constant index vector required
// by vrgather.vv. This covers all indice vectors up to size 4.
// TODO: We really should be costing the smaller vector. There are
// profitable cases this misses.
const unsigned ScalarSize =
Op.getSimpleValueType().getScalarSizeInBits();
if (ScalarSize > 8 && NumElts <= 4) {
unsigned SignBits = DAG.ComputeNumSignBits(Op);
if (ScalarSize - SignBits < 8) {
SDValue Source =
DAG.getNode(ISD::TRUNCATE, DL, VT.changeVectorElementType(MVT::i8), Op);
Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
Source, DAG, Subtarget);
SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
}

// For constant vectors, use generic constant pool lowering. Otherwise,
// we'd have to materialize constants in GPRs just to move them into the
// vector.
Expand Down
26 changes: 14 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -806,18 +806,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV32NOM: # %bb.0:
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32NOM-NEXT: vmv.v.i v9, 0
; RV32NOM-NEXT: li a0, -1
; RV32NOM-NEXT: vslide1down.vx v9, v9, a0
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV32NOM-NEXT: vle32.v v10, (a0)
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_1)
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
; RV32NOM-NEXT: vle32.v v11, (a0)
; RV32NOM-NEXT: li a0, -1
; RV32NOM-NEXT: vslide1down.vx v9, v9, a0
; RV32NOM-NEXT: vand.vv v9, v8, v9
; RV32NOM-NEXT: vmulh.vv v8, v8, v10
; RV32NOM-NEXT: vadd.vv v8, v8, v9
; RV32NOM-NEXT: vsra.vv v9, v8, v11
; RV32NOM-NEXT: lui a0, 12320
; RV32NOM-NEXT: addi a0, a0, 257
; RV32NOM-NEXT: vmv.s.x v9, a0
; RV32NOM-NEXT: vsext.vf4 v10, v9
; RV32NOM-NEXT: vsra.vv v9, v8, v10
; RV32NOM-NEXT: vsrl.vi v8, v8, 31
; RV32NOM-NEXT: vadd.vv v8, v9, v8
; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
Expand All @@ -841,18 +842,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vmv.v.i v9, 0
; RV64NOM-NEXT: li a0, -1
; RV64NOM-NEXT: vslide1down.vx v9, v9, a0
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV64NOM-NEXT: vle32.v v10, (a0)
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_1)
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
; RV64NOM-NEXT: vle32.v v11, (a0)
; RV64NOM-NEXT: li a0, -1
; RV64NOM-NEXT: vslide1down.vx v9, v9, a0
; RV64NOM-NEXT: vand.vv v9, v8, v9
; RV64NOM-NEXT: vmulh.vv v8, v8, v10
; RV64NOM-NEXT: vadd.vv v8, v8, v9
; RV64NOM-NEXT: vsra.vv v8, v8, v11
; RV64NOM-NEXT: lui a0, 12320
; RV64NOM-NEXT: addiw a0, a0, 257
; RV64NOM-NEXT: vmv.s.x v9, a0
; RV64NOM-NEXT: vsext.vf4 v10, v9
; RV64NOM-NEXT: vsra.vv v8, v8, v10
; RV64NOM-NEXT: vsrl.vi v9, v8, 31
; RV64NOM-NEXT: vadd.vv v8, v8, v9
; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
Expand Down
36 changes: 22 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -435,40 +435,48 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
; RV32-V128-LABEL: unary_interleave_v4f64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: lui a0, %hi(.LCPI13_0)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-V128-NEXT: vle16.v v12, (a0)
; RV32-V128-NEXT: lui a0, 12304
; RV32-V128-NEXT: addi a0, a0, 512
; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-V128-NEXT: vmv.s.x v10, a0
; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-V128-NEXT: vsext.vf2 v12, v10
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: unary_interleave_v4f64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: lui a0, %hi(.LCPI13_0)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV64-V128-NEXT: lui a0, 12304
; RV64-V128-NEXT: addiw a0, a0, 512
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-V128-NEXT: vle64.v v12, (a0)
; RV64-V128-NEXT: vmv.s.x v10, a0
; RV64-V128-NEXT: vsext.vf8 v12, v10
; RV64-V128-NEXT: vrgather.vv v10, v8, v12
; RV64-V128-NEXT: vmv.v.v v8, v10
; RV64-V128-NEXT: ret
;
; RV32-V512-LABEL: unary_interleave_v4f64:
; RV32-V512: # %bb.0:
; RV32-V512-NEXT: lui a0, %hi(.LCPI13_0)
; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV32-V512-NEXT: vle16.v v10, (a0)
; RV32-V512-NEXT: lui a0, 12304
; RV32-V512-NEXT: addi a0, a0, 512
; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; RV32-V512-NEXT: vmv.s.x v9, a0
; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32-V512-NEXT: vsext.vf2 v10, v9
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10
; RV32-V512-NEXT: vmv.v.v v8, v9
; RV32-V512-NEXT: ret
;
; RV64-V512-LABEL: unary_interleave_v4f64:
; RV64-V512: # %bb.0:
; RV64-V512-NEXT: lui a0, %hi(.LCPI13_0)
; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV64-V512-NEXT: lui a0, 12304
; RV64-V512-NEXT: addiw a0, a0, 512
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-V512-NEXT: vle64.v v10, (a0)
; RV64-V512-NEXT: vmv.s.x v9, a0
; RV64-V512-NEXT: vsext.vf8 v10, v9
; RV64-V512-NEXT: vrgather.vv v9, v8, v10
; RV64-V512-NEXT: vmv.v.v v8, v9
; RV64-V512-NEXT: ret
Expand Down
36 changes: 22 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,24 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, %hi(.LCPI4_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI4_0)
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vle16.v v12, (a0)
; RV32-NEXT: lui a0, 4096
; RV32-NEXT: addi a0, a0, 513
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-NEXT: vsext.vf2 v12, v10
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, %hi(.LCPI4_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI4_0)
; RV64-NEXT: lui a0, 4096
; RV64-NEXT: addiw a0, a0, 513
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v12, (a0)
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsext.vf8 v12, v10
; RV64-NEXT: vrgather.vv v10, v8, v12
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
Expand All @@ -81,20 +85,24 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, %hi(.LCPI5_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI5_0)
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vle16.v v12, (a0)
; RV32-NEXT: lui a0, 4096
; RV32-NEXT: addi a0, a0, 513
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-NEXT: vsext.vf2 v12, v10
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, %hi(.LCPI5_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI5_0)
; RV64-NEXT: lui a0, 4096
; RV64-NEXT: addiw a0, a0, 513
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v12, (a0)
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsext.vf8 v12, v10
; RV64-NEXT: vrgather.vv v10, v8, v12
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
Expand Down
36 changes: 22 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -668,40 +668,48 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
; RV32-V128-LABEL: unary_interleave_v4i64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: lui a0, %hi(.LCPI22_0)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI22_0)
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-V128-NEXT: vle16.v v12, (a0)
; RV32-V128-NEXT: lui a0, 12304
; RV32-V128-NEXT: addi a0, a0, 512
; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-V128-NEXT: vmv.s.x v10, a0
; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-V128-NEXT: vsext.vf2 v12, v10
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: unary_interleave_v4i64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: lui a0, %hi(.LCPI22_0)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI22_0)
; RV64-V128-NEXT: lui a0, 12304
; RV64-V128-NEXT: addiw a0, a0, 512
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-V128-NEXT: vle64.v v12, (a0)
; RV64-V128-NEXT: vmv.s.x v10, a0
; RV64-V128-NEXT: vsext.vf8 v12, v10
; RV64-V128-NEXT: vrgather.vv v10, v8, v12
; RV64-V128-NEXT: vmv.v.v v8, v10
; RV64-V128-NEXT: ret
;
; RV32-V512-LABEL: unary_interleave_v4i64:
; RV32-V512: # %bb.0:
; RV32-V512-NEXT: lui a0, %hi(.LCPI22_0)
; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI22_0)
; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV32-V512-NEXT: vle16.v v10, (a0)
; RV32-V512-NEXT: lui a0, 12304
; RV32-V512-NEXT: addi a0, a0, 512
; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; RV32-V512-NEXT: vmv.s.x v9, a0
; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32-V512-NEXT: vsext.vf2 v10, v9
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10
; RV32-V512-NEXT: vmv.v.v v8, v9
; RV32-V512-NEXT: ret
;
; RV64-V512-LABEL: unary_interleave_v4i64:
; RV64-V512: # %bb.0:
; RV64-V512-NEXT: lui a0, %hi(.LCPI22_0)
; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI22_0)
; RV64-V512-NEXT: lui a0, 12304
; RV64-V512-NEXT: addiw a0, a0, 512
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-V512-NEXT: vle64.v v10, (a0)
; RV64-V512-NEXT: vmv.s.x v9, a0
; RV64-V512-NEXT: vsext.vf8 v10, v9
; RV64-V512-NEXT: vrgather.vv v9, v8, v10
; RV64-V512-NEXT: vmv.v.v v8, v9
; RV64-V512-NEXT: ret
Expand Down
64 changes: 46 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,29 +51,57 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
}

define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 4096
; RV32-NEXT: addi a0, a0, 513
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-NEXT: vsext.vf2 v10, v9
; RV32-NEXT: vrgather.vv v9, v8, v10
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 4096
; RV64-NEXT: addiw a0, a0, 513
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64-NEXT: vsext.vf2 v10, v9
; RV64-NEXT: vrgather.vv v9, v8, v10
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
ret <4 x i16> %s
}

define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 4096
; RV32-NEXT: addi a0, a0, 513
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-NEXT: vsext.vf2 v10, v9
; RV32-NEXT: vrgather.vv v9, v8, v10
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 4096
; RV64-NEXT: addiw a0, a0, 513
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64-NEXT: vsext.vf2 v10, v9
; RV64-NEXT: vrgather.vv v9, v8, v10
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
ret <4 x i16> %s
}
Expand Down
Loading