Skip to content

Commit 9f70e70

Browse files
authored
[LoongArch] Make ISD::FSQRT a legal operation with lsx/lasx feature (#74795)
And add some patterns: 1. (fdiv 1.0, vector) 2. (fdiv 1.0, (fsqrt vector))
1 parent 6c87a0a commit 9f70e70

File tree

7 files changed

+257
-0
lines changed

7 files changed

+257
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
269269
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
270270
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
271271
setOperationAction(ISD::FMA, VT, Legal);
272+
setOperationAction(ISD::FSQRT, VT, Legal);
272273
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
273274
ISD::SETUGE, ISD::SETUGT},
274275
VT, Expand);
@@ -309,6 +310,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
309310
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
310311
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
311312
setOperationAction(ISD::FMA, VT, Legal);
313+
setOperationAction(ISD::FSQRT, VT, Legal);
312314
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
313315
ISD::SETUGE, ISD::SETUGT},
314316
VT, Expand);

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,13 @@ multiclass PatXr<SDPatternOperator OpNode, string Inst> {
10921092
(!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
10931093
}
10941094

1095+
multiclass PatXrF<SDPatternOperator OpNode, string Inst> {
1096+
def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))),
1097+
(!cast<LAInst>(Inst#"_S") LASX256:$xj)>;
1098+
def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))),
1099+
(!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
1100+
}
1101+
10951102
multiclass PatXrXr<SDPatternOperator OpNode, string Inst> {
10961103
def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
10971104
(!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
14481455
def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
14491456
(XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
14501457

1458+
// XVFSQRT_{S/D}
1459+
defm : PatXrF<fsqrt, "XVFSQRT">;
1460+
1461+
// XVRECIP_{S/D}
1462+
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
1463+
(XVFRECIP_S v8f32:$xj)>;
1464+
def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj),
1465+
(XVFRECIP_D v4f64:$xj)>;
1466+
1467+
// XVFRSQRT_{S/D}
1468+
def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)),
1469+
(XVFRSQRT_S v8f32:$xj)>;
1470+
def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)),
1471+
(XVFRSQRT_D v4f64:$xj)>;
1472+
14511473
// XVSEQ[I]_{B/H/W/D}
14521474
defm : PatCCXrSimm5<SETEQ, "XVSEQI">;
14531475
defm : PatCCXrXr<SETEQ, "XVSEQ">;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector),
9595
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
9696
}]>;
9797

98+
def vsplatf32_fpimm_eq_1
99+
: PatFrags<(ops), [(bitconvert (v4i32 (build_vector))),
100+
(bitconvert (v8i32 (build_vector)))], [{
101+
APInt Imm;
102+
EVT EltTy = N->getValueType(0).getVectorElementType();
103+
N = N->getOperand(0).getNode();
104+
105+
return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
106+
Imm.getBitWidth() == EltTy.getSizeInBits() &&
107+
Imm == APFloat(+1.0f).bitcastToAPInt();
108+
}]>;
109+
def vsplatf64_fpimm_eq_1
110+
: PatFrags<(ops), [(bitconvert (v2i64 (build_vector))),
111+
(bitconvert (v4i64 (build_vector)))], [{
112+
APInt Imm;
113+
EVT EltTy = N->getValueType(0).getVectorElementType();
114+
N = N->getOperand(0).getNode();
115+
116+
return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
117+
Imm.getBitWidth() == EltTy.getSizeInBits() &&
118+
Imm == APFloat(+1.0).bitcastToAPInt();
119+
}]>;
120+
98121
def vsplati8imm7 : PatFrag<(ops node:$reg),
99122
(and node:$reg, vsplati8_imm_eq_7)>;
100123
def vsplati16imm15 : PatFrag<(ops node:$reg),
@@ -1173,6 +1196,13 @@ multiclass PatVr<SDPatternOperator OpNode, string Inst> {
11731196
(!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
11741197
}
11751198

1199+
multiclass PatVrF<SDPatternOperator OpNode, string Inst> {
1200+
def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))),
1201+
(!cast<LAInst>(Inst#"_S") LSX128:$vj)>;
1202+
def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))),
1203+
(!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
1204+
}
1205+
11761206
multiclass PatVrVr<SDPatternOperator OpNode, string Inst> {
11771207
def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
11781208
(!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
15251555
def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va),
15261556
(VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
15271557

1558+
// VFSQRT_{S/D}
1559+
defm : PatVrF<fsqrt, "VFSQRT">;
1560+
1561+
// VFRECIP_{S/D}
1562+
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
1563+
(VFRECIP_S v4f32:$vj)>;
1564+
def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj),
1565+
(VFRECIP_D v2f64:$vj)>;
1566+
1567+
// VFRSQRT_{S/D}
1568+
def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)),
1569+
(VFRSQRT_S v4f32:$vj)>;
1570+
def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)),
1571+
(VFRSQRT_D v2f64:$vj)>;
1572+
15281573
// VSEQ[I]_{B/H/W/D}
15291574
defm : PatCCVrSimm5<SETEQ, "VSEQI">;
15301575
defm : PatCCVrVr<SETEQ, "VSEQ">;
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
;; fsqrt
5+
define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind {
6+
; CHECK-LABEL: sqrt_v8f32:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: xvld $xr0, $a1, 0
9+
; CHECK-NEXT: xvfsqrt.s $xr0, $xr0
10+
; CHECK-NEXT: xvst $xr0, $a0, 0
11+
; CHECK-NEXT: ret
12+
entry:
13+
%v0 = load <8 x float>, ptr %a0, align 16
14+
%sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0)
15+
store <8 x float> %sqrt, ptr %res, align 16
16+
ret void
17+
}
18+
19+
define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind {
20+
; CHECK-LABEL: sqrt_v4f64:
21+
; CHECK: # %bb.0: # %entry
22+
; CHECK-NEXT: xvld $xr0, $a1, 0
23+
; CHECK-NEXT: xvfsqrt.d $xr0, $xr0
24+
; CHECK-NEXT: xvst $xr0, $a0, 0
25+
; CHECK-NEXT: ret
26+
entry:
27+
%v0 = load <4 x double>, ptr %a0, align 16
28+
%sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0)
29+
store <4 x double> %sqrt, ptr %res, align 16
30+
ret void
31+
}
32+
33+
;; 1.0 / (fsqrt vec)
34+
define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
35+
; CHECK-LABEL: one_div_sqrt_v8f32:
36+
; CHECK: # %bb.0: # %entry
37+
; CHECK-NEXT: xvld $xr0, $a1, 0
38+
; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0
39+
; CHECK-NEXT: xvst $xr0, $a0, 0
40+
; CHECK-NEXT: ret
41+
entry:
42+
%v0 = load <8 x float>, ptr %a0, align 16
43+
%sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0)
44+
%div = fdiv <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
45+
store <8 x float> %div, ptr %res, align 16
46+
ret void
47+
}
48+
49+
define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind {
50+
; CHECK-LABEL: one_div_sqrt_v4f64:
51+
; CHECK: # %bb.0: # %entry
52+
; CHECK-NEXT: xvld $xr0, $a1, 0
53+
; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0
54+
; CHECK-NEXT: xvst $xr0, $a0, 0
55+
; CHECK-NEXT: ret
56+
entry:
57+
%v0 = load <4 x double>, ptr %a0, align 16
58+
%sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0)
59+
%div = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %sqrt
60+
store <4 x double> %div, ptr %res, align 16
61+
ret void
62+
}
63+
64+
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
65+
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,32 @@ entry:
3232
store <4 x double> %v2, ptr %res
3333
ret void
3434
}
35+
36+
;; 1.0 / vec
37+
define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
38+
; CHECK-LABEL: one_fdiv_v8f32:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: xvld $xr0, $a1, 0
41+
; CHECK-NEXT: xvfrecip.s $xr0, $xr0
42+
; CHECK-NEXT: xvst $xr0, $a0, 0
43+
; CHECK-NEXT: ret
44+
entry:
45+
%v0 = load <8 x float>, ptr %a0
46+
%div = fdiv <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %v0
47+
store <8 x float> %div, ptr %res
48+
ret void
49+
}
50+
51+
define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
52+
; CHECK-LABEL: one_fdiv_v4f64:
53+
; CHECK: # %bb.0: # %entry
54+
; CHECK-NEXT: xvld $xr0, $a1, 0
55+
; CHECK-NEXT: xvfrecip.d $xr0, $xr0
56+
; CHECK-NEXT: xvst $xr0, $a0, 0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%v0 = load <4 x double>, ptr %a0
60+
%div = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
61+
store <4 x double> %div, ptr %res
62+
ret void
63+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
;; fsqrt
5+
define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind {
6+
; CHECK-LABEL: sqrt_v4f32:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vld $vr0, $a1, 0
9+
; CHECK-NEXT: vfsqrt.s $vr0, $vr0
10+
; CHECK-NEXT: vst $vr0, $a0, 0
11+
; CHECK-NEXT: ret
12+
entry:
13+
%v0 = load <4 x float>, ptr %a0, align 16
14+
%sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0)
15+
store <4 x float> %sqrt, ptr %res, align 16
16+
ret void
17+
}
18+
19+
define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind {
20+
; CHECK-LABEL: sqrt_v2f64:
21+
; CHECK: # %bb.0: # %entry
22+
; CHECK-NEXT: vld $vr0, $a1, 0
23+
; CHECK-NEXT: vfsqrt.d $vr0, $vr0
24+
; CHECK-NEXT: vst $vr0, $a0, 0
25+
; CHECK-NEXT: ret
26+
entry:
27+
%v0 = load <2 x double>, ptr %a0, align 16
28+
%sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0)
29+
store <2 x double> %sqrt, ptr %res, align 16
30+
ret void
31+
}
32+
33+
;; 1.0 / (fsqrt vec)
34+
define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind {
35+
; CHECK-LABEL: one_div_sqrt_v4f32:
36+
; CHECK: # %bb.0: # %entry
37+
; CHECK-NEXT: vld $vr0, $a1, 0
38+
; CHECK-NEXT: vfrsqrt.s $vr0, $vr0
39+
; CHECK-NEXT: vst $vr0, $a0, 0
40+
; CHECK-NEXT: ret
41+
entry:
42+
%v0 = load <4 x float>, ptr %a0, align 16
43+
%sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0)
44+
%div = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
45+
store <4 x float> %div, ptr %res, align 16
46+
ret void
47+
}
48+
49+
define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind {
50+
; CHECK-LABEL: one_div_sqrt_v2f64:
51+
; CHECK: # %bb.0: # %entry
52+
; CHECK-NEXT: vld $vr0, $a1, 0
53+
; CHECK-NEXT: vfrsqrt.d $vr0, $vr0
54+
; CHECK-NEXT: vst $vr0, $a0, 0
55+
; CHECK-NEXT: ret
56+
entry:
57+
%v0 = load <2 x double>, ptr %a0, align 16
58+
%sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0)
59+
%div = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
60+
store <2 x double> %div, ptr %res, align 16
61+
ret void
62+
}
63+
64+
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
65+
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,32 @@ entry:
3232
store <2 x double> %v2, ptr %res
3333
ret void
3434
}
35+
36+
;; 1.0 / vec
37+
define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind {
38+
; CHECK-LABEL: one_fdiv_v4f32:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: vld $vr0, $a1, 0
41+
; CHECK-NEXT: vfrecip.s $vr0, $vr0
42+
; CHECK-NEXT: vst $vr0, $a0, 0
43+
; CHECK-NEXT: ret
44+
entry:
45+
%v0 = load <4 x float>, ptr %a0
46+
%div = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %v0
47+
store <4 x float> %div, ptr %res
48+
ret void
49+
}
50+
51+
define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind {
52+
; CHECK-LABEL: one_fdiv_v2f64:
53+
; CHECK: # %bb.0: # %entry
54+
; CHECK-NEXT: vld $vr0, $a1, 0
55+
; CHECK-NEXT: vfrecip.d $vr0, $vr0
56+
; CHECK-NEXT: vst $vr0, $a0, 0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%v0 = load <2 x double>, ptr %a0
60+
%div = fdiv <2 x double> <double 1.0, double 1.0>, %v0
61+
store <2 x double> %div, ptr %res
62+
ret void
63+
}

0 commit comments

Comments
 (0)