-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LoongArch] Make ISD::FSQRT a legal operation with lsx/lasx feature #74795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
And add some patterns: 1. (fdiv 1.0, vector) 2. (fdiv 1.0, (fsqrt vector))
@llvm/pr-subscribers-backend-loongarch Author: wanglei (wangleiat) ChangesAnd add some patterns:
Full diff: https://github.com/llvm/llvm-project/pull/74795.diff 7 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e9e29c2dc8929d..e4b1d2d30516a7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -269,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
@@ -309,6 +310,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index a9bf65c6840d5c..55b90f4450c0d3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1092,6 +1092,13 @@ multiclass PatXr<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
}
+multiclass PatXrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_S") LASX256:$xj)>;
+ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
+}
+
multiclass PatXrXr<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
(!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
(XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+// XVFSQRT_{S/D}
+defm : PatXrF<fsqrt, "XVFSQRT">;
+
+// XVRECIP_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
+ (XVFRECIP_S v8f32:$xj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj),
+ (XVFRECIP_D v4f64:$xj)>;
+
+// XVFRSQRT_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)),
+ (XVFRSQRT_S v8f32:$xj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)),
+ (XVFRSQRT_D v4f64:$xj)>;
+
// XVSEQ[I]_{B/H/W/D}
defm : PatCCXrSimm5<SETEQ, "XVSEQI">;
defm : PatCCXrXr<SETEQ, "XVSEQ">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index ff21c6681271e7..8ad0c5904f2583 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector),
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
}]>;
+def vsplatf32_fpimm_eq_1
+ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))),
+ (bitconvert (v8i32 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() &&
+ Imm == APFloat(+1.0f).bitcastToAPInt();
+}]>;
+def vsplatf64_fpimm_eq_1
+ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))),
+ (bitconvert (v4i64 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() &&
+ Imm == APFloat(+1.0).bitcastToAPInt();
+}]>;
+
def vsplati8imm7 : PatFrag<(ops node:$reg),
(and node:$reg, vsplati8_imm_eq_7)>;
def vsplati16imm15 : PatFrag<(ops node:$reg),
@@ -1173,6 +1196,13 @@ multiclass PatVr<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
}
+multiclass PatVrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_S") LSX128:$vj)>;
+ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
+}
+
multiclass PatVrVr<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
(!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va),
(VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+// VFSQRT_{S/D}
+defm : PatVrF<fsqrt, "VFSQRT">;
+
+// VFRECIP_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
+ (VFRECIP_S v4f32:$vj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj),
+ (VFRECIP_D v2f64:$vj)>;
+
+// VFRSQRT_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)),
+ (VFRSQRT_S v4f32:$vj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)),
+ (VFRSQRT_D v2f64:$vj)>;
+
// VSEQ[I]_{B/H/W/D}
defm : PatCCVrSimm5<SETEQ, "VSEQI">;
defm : PatCCVrVr<SETEQ, "VSEQ">;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll
new file mode 100644
index 00000000000000..c4a881bdeae9f1
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; fsqrt
+define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: sqrt_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfsqrt.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0, align 16
+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0)
+ store <8 x float> %sqrt, ptr %res, align 16
+ ret void
+}
+
+define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: sqrt_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfsqrt.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0, align 16
+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0)
+ store <4 x double> %sqrt, ptr %res, align 16
+ ret void
+}
+
+;; 1.0 / (fsqrt vec)
+define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_div_sqrt_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0, align 16
+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0)
+ %div = fdiv <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+ store <8 x float> %div, ptr %res, align 16
+ ret void
+}
+
+define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_div_sqrt_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0, align 16
+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0)
+ %div = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %sqrt
+ store <4 x double> %div, ptr %res, align 16
+ ret void
+}
+
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
index 284121a79a492d..6004565b0b784e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
@@ -32,3 +32,32 @@ entry:
store <4 x double> %v2, ptr %res
ret void
}
+
+;; 1.0 / vec
+define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_fdiv_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrecip.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %div = fdiv <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %v0
+ store <8 x float> %div, ptr %res
+ ret void
+}
+
+define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_fdiv_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrecip.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %div = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
+ store <4 x double> %div, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll
new file mode 100644
index 00000000000000..a57bc1ca0e9488
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+;; fsqrt
+define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: sqrt_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfsqrt.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0, align 16
+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0)
+ store <4 x float> %sqrt, ptr %res, align 16
+ ret void
+}
+
+define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: sqrt_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfsqrt.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0, align 16
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0)
+ store <2 x double> %sqrt, ptr %res, align 16
+ ret void
+}
+
+;; 1.0 / (fsqrt vec)
+define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_div_sqrt_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrsqrt.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0, align 16
+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0)
+ %div = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+ store <4 x float> %div, ptr %res, align 16
+ ret void
+}
+
+define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_div_sqrt_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrsqrt.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0, align 16
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0)
+ %div = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
+ store <2 x double> %div, ptr %res, align 16
+ ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
index eb7c8bd9616ec7..5f1ee9e4d212eb 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
@@ -32,3 +32,32 @@ entry:
store <2 x double> %v2, ptr %res
ret void
}
+
+;; 1.0 / vec
+define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_fdiv_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrecip.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %div = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %v0
+ store <4 x float> %div, ptr %res
+ ret void
+}
+
+define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: one_fdiv_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrecip.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %div = fdiv <2 x double> <double 1.0, double 1.0>, %v0
+ store <2 x double> %div, ptr %res
+ ret void
+}
|
SixWeining
approved these changes
Dec 8, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
And add some patterns: