-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Use X0 for VLMax for slide1up/slide1down in lowerVectorIntrinsicScalars. #101384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…sicScalars. Previously, we created a vsetvlimax intrinsic. Using X0 simplifies the code and enables some optimizations to kick when the exact value of vlmax is known.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesPreviously, we created a vsetvlimax intrinsic. Using X0 simplifies the code and enables some optimizations to kick when the exact value of vlmax is known. Full diff: https://github.com/llvm/llvm-project/pull/101384.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 96edd331eb678..68b614d1d3fdc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8857,14 +8857,7 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
} else if (AVLInt >= 2 * MaxVLMAX) {
// Just set vl to VLMAX in this situation
- RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
- SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
- unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
- SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
- SDValue SETVLMAX = DAG.getTargetConstant(
- Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
- I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
- LMUL);
+ I32VL = DAG.getRegister(RISCV::X0, XLenVT);
} else {
// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
// is related to the hardware implementation.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll
index 4115e6a91f965..fd90e67b1fb2c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll
@@ -51,7 +51,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2(<vsc
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -84,7 +84,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3(<vsc
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8(<vsc
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -152,7 +152,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9(<vsc
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -187,7 +187,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15(<vs
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -213,14 +213,14 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16(<vs
;
; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16:
; CHECK-512: # %bb.0: # %entry
-; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-512-NEXT: vslide1down.vx v8, v8, a0
; CHECK-512-NEXT: vslide1down.vx v8, v8, a1
; CHECK-512-NEXT: ret
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -247,14 +247,14 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047(<
;
; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047:
; CHECK-512: # %bb.0: # %entry
-; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-512-NEXT: vslide1down.vx v8, v8, a0
; CHECK-512-NEXT: vslide1down.vx v8, v8, a1
; CHECK-512-NEXT: ret
;
; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
; CHECK-64-NEXT: ret
@@ -269,12 +269,26 @@ entry:
}
define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: ret
+; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-128-65536: # %bb.0: # %entry
+; CHECK-128-65536-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-128-65536-NEXT: ret
+;
+; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-512: # %bb.0: # %entry
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-512-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-512-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-512-NEXT: ret
+;
+; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-64-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-64-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-64-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64.i64(
<vscale x 1 x i64> undef,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll
index f0d621bef2b91..b26f1cab97c77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll
@@ -51,7 +51,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2(<vscal
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -84,7 +84,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3(<vscal
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8(<vscal
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -152,7 +152,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9(<vscal
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -187,7 +187,7 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15(<vsca
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -213,14 +213,14 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16(<vsca
;
; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16:
; CHECK-512: # %bb.0: # %entry
-; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-512-NEXT: vslide1up.vx v9, v8, a1
; CHECK-512-NEXT: vslide1up.vx v8, v9, a0
; CHECK-512-NEXT: ret
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -247,14 +247,14 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047(<vs
;
; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047:
; CHECK-512: # %bb.0: # %entry
-; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-512-NEXT: vslide1up.vx v9, v8, a1
; CHECK-512-NEXT: vslide1up.vx v8, v9, a0
; CHECK-512-NEXT: ret
;
; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
; CHECK-64-NEXT: ret
@@ -269,12 +269,26 @@ entry:
}
define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vslide1up.vx v9, v8, a1
-; CHECK-NEXT: vslide1up.vx v8, v9, a0
-; CHECK-NEXT: ret
+; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-128-65536: # %bb.0: # %entry
+; CHECK-128-65536-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1
+; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0
+; CHECK-128-65536-NEXT: ret
+;
+; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-512: # %bb.0: # %entry
+; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-512-NEXT: vslide1up.vx v9, v8, a1
+; CHECK-512-NEXT: vslide1up.vx v8, v9, a0
+; CHECK-512-NEXT: ret
+;
+; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-64-NEXT: vslide1up.vx v9, v8, a1
+; CHECK-64-NEXT: vslide1up.vx v8, v9, a0
+; CHECK-64-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
<vscale x 1 x i64> undef,
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Previously, we created a vsetvlimax intrinsic. Using X0 simplifies the code and enables some optimizations to kick when the exact value of vlmax is known.