Skip to content

Commit 39a0aa5

Browse files
[SelectionDAG] Lower llvm.ldexp.f32 to ldexp() on Windows. (#95301)
This reduces codesize. As discussed in #92707.
1 parent c0dc134 commit 39a0aa5

File tree

7 files changed

+645
-1238
lines changed

7 files changed

+645
-1238
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17861786
IsStrictFPEnabled = true;
17871787
setMaxAtomicSizeInBitsSupported(128);
17881788

1789+
// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1790+
// it, but it's just a wrapper around ldexp.
1791+
if (Subtarget->isTargetWindows()) {
1792+
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1793+
if (isOperationExpand(Op, MVT::f32))
1794+
setOperationAction(Op, MVT::f32, Promote);
1795+
}
1796+
1797+
// LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1798+
// isn't legal.
1799+
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1800+
if (isOperationExpand(Op, MVT::f16))
1801+
setOperationAction(Op, MVT::f16, Promote);
1802+
17891803
if (Subtarget->isWindowsArm64EC()) {
17901804
// FIXME: are there intrinsics we need to exclude from this?
17911805
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
15941594
}
15951595
}
15961596

1597+
// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1598+
// it, but it's just a wrapper around ldexp.
1599+
if (Subtarget->isTargetWindows()) {
1600+
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1601+
if (isOperationExpand(Op, MVT::f32))
1602+
setOperationAction(Op, MVT::f32, Promote);
1603+
}
1604+
1605+
// LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1606+
// isn't legal.
1607+
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1608+
if (isOperationExpand(Op, MVT::f16))
1609+
setOperationAction(Op, MVT::f16, Promote);
1610+
15971611
// We have target-specific dag combine patterns for the following nodes:
15981612
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
15991613
setTargetDAGCombine(

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,6 +2500,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
25002500
setOperationAction(Op, MVT::f32, Promote);
25012501
// clang-format on
25022502

2503+
// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
2504+
// it, but it's just a wrapper around ldexp.
2505+
if (Subtarget.isOSWindows()) {
2506+
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
2507+
if (isOperationExpand(Op, MVT::f32))
2508+
setOperationAction(Op, MVT::f32, Promote);
2509+
}
2510+
25032511
// We have target-specific dag combine patterns for the following nodes:
25042512
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
25052513
ISD::SCALAR_TO_VECTOR,

llvm/test/CodeGen/AArch64/ldexp.ll

Lines changed: 117 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,114 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck -check-prefixes=SVE,SVELINUX %s
3+
; RUN: llc -mtriple=aarch64-windows-msvc -mattr=+sve < %s -o - | FileCheck -check-prefixes=SVE,SVEWINDOWS %s
4+
; RUN: llc -mtriple=aarch64-windows-msvc < %s -o - | FileCheck -check-prefixes=WINDOWS %s
35

46
define double @testExp(double %val, i32 %a) {
5-
; CHECK-LABEL: testExp:
6-
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
8-
; CHECK-NEXT: sxtw x8, w0
9-
; CHECK-NEXT: ptrue p0.d
10-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
11-
; CHECK-NEXT: fmov d1, x8
12-
; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
13-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
14-
; CHECK-NEXT: ret
7+
; SVE-LABEL: testExp:
8+
; SVE: // %bb.0: // %entry
9+
; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
10+
; SVE-NEXT: sxtw x8, w0
11+
; SVE-NEXT: ptrue p0.d
12+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
13+
; SVE-NEXT: fmov d1, x8
14+
; SVE-NEXT: fscale z0.d, p0/m, z0.d, z1.d
15+
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
16+
; SVE-NEXT: ret
17+
;
18+
; WINDOWS-LABEL: testExp:
19+
; WINDOWS: // %bb.0: // %entry
20+
; WINDOWS-NEXT: b ldexp
1521
entry:
1622
%call = tail call fast double @ldexp(double %val, i32 %a)
1723
ret double %call
1824
}
1925

2026
declare double @ldexp(double, i32) memory(none)
2127

28+
define double @testExpIntrinsic(double %val, i32 %a) {
29+
; SVE-LABEL: testExpIntrinsic:
30+
; SVE: // %bb.0: // %entry
31+
; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
32+
; SVE-NEXT: sxtw x8, w0
33+
; SVE-NEXT: ptrue p0.d
34+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
35+
; SVE-NEXT: fmov d1, x8
36+
; SVE-NEXT: fscale z0.d, p0/m, z0.d, z1.d
37+
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
38+
; SVE-NEXT: ret
39+
;
40+
; WINDOWS-LABEL: testExpIntrinsic:
41+
; WINDOWS: // %bb.0: // %entry
42+
; WINDOWS-NEXT: b ldexp
43+
entry:
44+
%call = tail call fast double @llvm.ldexp.f64(double %val, i32 %a)
45+
ret double %call
46+
}
47+
2248
define float @testExpf(float %val, i32 %a) {
23-
; CHECK-LABEL: testExpf:
24-
; CHECK: // %bb.0: // %entry
25-
; CHECK-NEXT: fmov s1, w0
26-
; CHECK-NEXT: ptrue p0.s
27-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
28-
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
29-
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
30-
; CHECK-NEXT: ret
49+
; SVELINUX-LABEL: testExpf:
50+
; SVELINUX: // %bb.0: // %entry
51+
; SVELINUX-NEXT: fmov s1, w0
52+
; SVELINUX-NEXT: ptrue p0.s
53+
; SVELINUX-NEXT: // kill: def $s0 killed $s0 def $z0
54+
; SVELINUX-NEXT: fscale z0.s, p0/m, z0.s, z1.s
55+
; SVELINUX-NEXT: // kill: def $s0 killed $s0 killed $z0
56+
; SVELINUX-NEXT: ret
57+
;
58+
; SVEWINDOWS-LABEL: testExpf:
59+
; SVEWINDOWS: // %bb.0: // %entry
60+
; SVEWINDOWS-NEXT: b ldexpf
61+
;
62+
; WINDOWS-LABEL: testExpf:
63+
; WINDOWS: // %bb.0: // %entry
64+
; WINDOWS-NEXT: b ldexpf
3165
entry:
3266
%call = tail call fast float @ldexpf(float %val, i32 %a)
3367
ret float %call
3468
}
3569

70+
define float @testExpfIntrinsic(float %val, i32 %a) {
71+
; SVE-LABEL: testExpfIntrinsic:
72+
; SVE: // %bb.0: // %entry
73+
; SVE-NEXT: fmov s1, w0
74+
; SVE-NEXT: ptrue p0.s
75+
; SVE-NEXT: // kill: def $s0 killed $s0 def $z0
76+
; SVE-NEXT: fscale z0.s, p0/m, z0.s, z1.s
77+
; SVE-NEXT: // kill: def $s0 killed $s0 killed $z0
78+
; SVE-NEXT: ret
79+
;
80+
; WINDOWS-LABEL: testExpfIntrinsic:
81+
; WINDOWS: .seh_proc testExpfIntrinsic
82+
; WINDOWS-NEXT: // %bb.0: // %entry
83+
; WINDOWS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
84+
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
85+
; WINDOWS-NEXT: .seh_endprologue
86+
; WINDOWS-NEXT: fcvt d0, s0
87+
; WINDOWS-NEXT: bl ldexp
88+
; WINDOWS-NEXT: fcvt s0, d0
89+
; WINDOWS-NEXT: .seh_startepilogue
90+
; WINDOWS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
91+
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
92+
; WINDOWS-NEXT: .seh_endepilogue
93+
; WINDOWS-NEXT: ret
94+
; WINDOWS-NEXT: .seh_endfunclet
95+
; WINDOWS-NEXT: .seh_endproc
96+
entry:
97+
%call = tail call fast float @llvm.ldexp.f32(float %val, i32 %a)
98+
ret float %call
99+
}
100+
101+
36102
declare float @ldexpf(float, i32) memory(none)
37103

38104
define fp128 @testExpl(fp128 %val, i32 %a) {
39-
; CHECK-LABEL: testExpl:
40-
; CHECK: // %bb.0: // %entry
41-
; CHECK-NEXT: b ldexpl
105+
; SVE-LABEL: testExpl:
106+
; SVE: // %bb.0: // %entry
107+
; SVE-NEXT: b ldexpl
108+
;
109+
; WINDOWS-LABEL: testExpl:
110+
; WINDOWS: // %bb.0: // %entry
111+
; WINDOWS-NEXT: b ldexpl
42112
entry:
43113
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
44114
ret fp128 %call
@@ -47,14 +117,31 @@ entry:
47117
declare fp128 @ldexpl(fp128, i32) memory(none)
48118

49119
define half @testExpf16(half %val, i32 %a) {
50-
; CHECK-LABEL: testExpf16:
51-
; CHECK: // %bb.0: // %entry
52-
; CHECK-NEXT: fcvt s0, h0
53-
; CHECK-NEXT: fmov s1, w0
54-
; CHECK-NEXT: ptrue p0.s
55-
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
56-
; CHECK-NEXT: fcvt h0, s0
57-
; CHECK-NEXT: ret
120+
; SVE-LABEL: testExpf16:
121+
; SVE: // %bb.0: // %entry
122+
; SVE-NEXT: fcvt s0, h0
123+
; SVE-NEXT: fmov s1, w0
124+
; SVE-NEXT: ptrue p0.s
125+
; SVE-NEXT: fscale z0.s, p0/m, z0.s, z1.s
126+
; SVE-NEXT: fcvt h0, s0
127+
; SVE-NEXT: ret
128+
;
129+
; WINDOWS-LABEL: testExpf16:
130+
; WINDOWS: .seh_proc testExpf16
131+
; WINDOWS-NEXT: // %bb.0: // %entry
132+
; WINDOWS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
133+
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
134+
; WINDOWS-NEXT: .seh_endprologue
135+
; WINDOWS-NEXT: fcvt d0, h0
136+
; WINDOWS-NEXT: bl ldexp
137+
; WINDOWS-NEXT: fcvt h0, d0
138+
; WINDOWS-NEXT: .seh_startepilogue
139+
; WINDOWS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
140+
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
141+
; WINDOWS-NEXT: .seh_endepilogue
142+
; WINDOWS-NEXT: ret
143+
; WINDOWS-NEXT: .seh_endfunclet
144+
; WINDOWS-NEXT: .seh_endproc
58145
entry:
59146
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
60147
ret half %0

llvm/test/CodeGen/ARM/ldexp.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; RUN: llc -mtriple=armv7-linux < %s -o - | FileCheck -check-prefix=LINUX %s
2+
; RUN: llc -mtriple=thumbv7-windows-msvc -mattr=+thumb-mode < %s -o - | FileCheck -check-prefix=WINDOWS %s
3+
4+
define double @testExp(double %val, i32 %a) {
5+
; LINUX: b ldexp{{$}}
6+
; WINDOWS: b.w ldexp{{$}}
7+
entry:
8+
%call = tail call fast double @ldexp(double %val, i32 %a)
9+
ret double %call
10+
}
11+
12+
declare double @ldexp(double, i32) memory(none)
13+
14+
define double @testExpIntrinsic(double %val, i32 %a) {
15+
; LINUX: b ldexp{{$}}
16+
; WINDOWS: b.w ldexp{{$}}
17+
entry:
18+
%call = tail call fast double @llvm.ldexp.f64(double %val, i32 %a)
19+
ret double %call
20+
}
21+
22+
define float @testExpf(float %val, i32 %a) {
23+
; LINUX: b ldexpf
24+
; WINDOWS: b.w ldexpf
25+
entry:
26+
%call = tail call fast float @ldexpf(float %val, i32 %a)
27+
ret float %call
28+
}
29+
30+
define float @testExpfIntrinsic(float %val, i32 %a) {
31+
; LINUX: b ldexpf
32+
; WINDOWS: bl ldexp{{$}}
33+
entry:
34+
%call = tail call fast float @llvm.ldexp.f32(float %val, i32 %a)
35+
ret float %call
36+
}
37+
38+
declare float @ldexpf(float, i32) memory(none)
39+
40+
define fp128 @testExpl(fp128 %val, i32 %a) {
41+
; LINUX: bl ldexpl
42+
; WINDOWS: b.w ldexpl
43+
entry:
44+
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
45+
ret fp128 %call
46+
}
47+
48+
declare fp128 @ldexpl(fp128, i32) memory(none)
49+
50+
define half @testExpf16(half %val, i32 %a) {
51+
; LINUX: bl ldexpf
52+
; WINDOWS: bl ldexp{{$}}
53+
entry:
54+
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
55+
ret half %0
56+
}
57+
58+
declare half @llvm.ldexp.f16.i32(half, i32) memory(none)

0 commit comments

Comments
 (0)