Skip to content

Commit 323d3ab

Browse files
authored
[RISCV] Optimize undef Even vector in getWideningInterleave. (#88221)
We recently optimized the code when the Odd vector was undef to fix a poison bug. There are additional optimizations we can do if the even vector is undef. With Zvbb, we can use a single vwsll. Without Zvbb, we can use a vzext.vf2 and a vsll.
1 parent 51786eb commit 323d3ab

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4638,8 +4638,17 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
46384638
Subtarget.getXLenVT()));
46394639
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
46404640
OffsetVec, Passthru, Mask, VL);
4641-
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4642-
Interleaved, EvenV, Passthru, Mask, VL);
4641+
if (!EvenV.isUndef())
4642+
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4643+
Interleaved, EvenV, Passthru, Mask, VL);
4644+
} else if (EvenV.isUndef()) {
4645+
Interleaved =
4646+
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4647+
4648+
SDValue OffsetVec =
4649+
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4650+
Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4651+
Interleaved, OffsetVec, Passthru, Mask, VL);
46434652
} else {
46444653
// FIXME: We should freeze the odd vector here. We already handled the case
46454654
// of provably undef/poison above.

llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,26 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4
674674
ret <vscale x 8 x i32> %res
675675
}
676676

677+
define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison2(<vscale x 4 x i32> %a) {
678+
; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
679+
; CHECK: # %bb.0:
680+
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
681+
; CHECK-NEXT: vzext.vf2 v12, v8
682+
; CHECK-NEXT: li a0, 32
683+
; CHECK-NEXT: vsll.vx v8, v12, a0
684+
; CHECK-NEXT: ret
685+
;
686+
; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
687+
; ZVBB: # %bb.0:
688+
; ZVBB-NEXT: li a0, 32
689+
; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma
690+
; ZVBB-NEXT: vwsll.vx v12, v8, a0
691+
; ZVBB-NEXT: vmv4r.v v8, v12
692+
; ZVBB-NEXT: ret
693+
%res = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a)
694+
ret <vscale x 8 x i32> %res
695+
}
696+
677697
declare <vscale x 64 x half> @llvm.experimental.vector.interleave2.nxv64f16(<vscale x 32 x half>, <vscale x 32 x half>)
678698
declare <vscale x 32 x float> @llvm.experimental.vector.interleave2.nxv32f32(<vscale x 16 x float>, <vscale x 16 x float>)
679699
declare <vscale x 16 x double> @llvm.experimental.vector.interleave2.nxv16f64(<vscale x 8 x double>, <vscale x 8 x double>)

0 commit comments

Comments
 (0)