Skip to content

[SelectionDAG] Lower llvm.ldexp.f32 to ldexp() on Windows. #95301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
IsStrictFPEnabled = true;
setMaxAtomicSizeInBitsSupported(128);

// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
// it, but it's just a wrapper around ldexp.
if (Subtarget->isTargetWindows()) {
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
}

// LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
// isn't legal.
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
if (isOperationExpand(Op, MVT::f16))
setOperationAction(Op, MVT::f16, Promote);

if (Subtarget->isWindowsArm64EC()) {
// FIXME: are there intrinsics we need to exclude from this?
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}

// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
// it, but it's just a wrapper around ldexp.
if (Subtarget->isTargetWindows()) {
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
}

// LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
// isn't legal.
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
if (isOperationExpand(Op, MVT::f16))
setOperationAction(Op, MVT::f16, Promote);

// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);

// On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
// it, but it's just a wrapper around ldexp.
if (Subtarget.isOSWindows()) {
for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
}

// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
Expand Down
147 changes: 117 additions & 30 deletions llvm/test/CodeGen/AArch64/ldexp.ll
Original file line number Diff line number Diff line change
@@ -1,44 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck -check-prefixes=SVE,SVELINUX %s
; RUN: llc -mtriple=aarch64-windows-msvc -mattr=+sve < %s -o - | FileCheck -check-prefixes=SVE,SVEWINDOWS %s
; RUN: llc -mtriple=aarch64-windows-msvc < %s -o - | FileCheck -check-prefixes=WINDOWS %s

define double @testExp(double %val, i32 %a) {
; CHECK-LABEL: testExp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
; SVE-LABEL: testExp:
; SVE: // %bb.0: // %entry
; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
; SVE-NEXT: sxtw x8, w0
; SVE-NEXT: ptrue p0.d
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE-NEXT: fmov d1, x8
; SVE-NEXT: fscale z0.d, p0/m, z0.d, z1.d
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT: ret
;
; WINDOWS-LABEL: testExp:
; WINDOWS: // %bb.0: // %entry
; WINDOWS-NEXT: b ldexp
entry:
%call = tail call fast double @ldexp(double %val, i32 %a)
ret double %call
}

declare double @ldexp(double, i32) memory(none)

define double @testExpIntrinsic(double %val, i32 %a) {
; SVE-LABEL: testExpIntrinsic:
; SVE: // %bb.0: // %entry
; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
; SVE-NEXT: sxtw x8, w0
; SVE-NEXT: ptrue p0.d
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE-NEXT: fmov d1, x8
; SVE-NEXT: fscale z0.d, p0/m, z0.d, z1.d
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT: ret
;
; WINDOWS-LABEL: testExpIntrinsic:
; WINDOWS: // %bb.0: // %entry
; WINDOWS-NEXT: b ldexp
entry:
%call = tail call fast double @llvm.ldexp.f64(double %val, i32 %a)
ret double %call
}

define float @testExpf(float %val, i32 %a) {
; CHECK-LABEL: testExpf:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
; SVELINUX-LABEL: testExpf:
; SVELINUX: // %bb.0: // %entry
; SVELINUX-NEXT: fmov s1, w0
; SVELINUX-NEXT: ptrue p0.s
; SVELINUX-NEXT: // kill: def $s0 killed $s0 def $z0
; SVELINUX-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; SVELINUX-NEXT: // kill: def $s0 killed $s0 killed $z0
; SVELINUX-NEXT: ret
;
; SVEWINDOWS-LABEL: testExpf:
; SVEWINDOWS: // %bb.0: // %entry
; SVEWINDOWS-NEXT: b ldexpf
;
; WINDOWS-LABEL: testExpf:
; WINDOWS: // %bb.0: // %entry
; WINDOWS-NEXT: b ldexpf
entry:
%call = tail call fast float @ldexpf(float %val, i32 %a)
ret float %call
}

define float @testExpfIntrinsic(float %val, i32 %a) {
; SVE-LABEL: testExpfIntrinsic:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fmov s1, w0
; SVE-NEXT: ptrue p0.s
; SVE-NEXT: // kill: def $s0 killed $s0 def $z0
; SVE-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; SVE-NEXT: // kill: def $s0 killed $s0 killed $z0
; SVE-NEXT: ret
;
; WINDOWS-LABEL: testExpfIntrinsic:
; WINDOWS: .seh_proc testExpfIntrinsic
; WINDOWS-NEXT: // %bb.0: // %entry
; WINDOWS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
; WINDOWS-NEXT: .seh_endprologue
; WINDOWS-NEXT: fcvt d0, s0
; WINDOWS-NEXT: bl ldexp
; WINDOWS-NEXT: fcvt s0, d0
; WINDOWS-NEXT: .seh_startepilogue
; WINDOWS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
; WINDOWS-NEXT: .seh_endepilogue
; WINDOWS-NEXT: ret
; WINDOWS-NEXT: .seh_endfunclet
; WINDOWS-NEXT: .seh_endproc
entry:
%call = tail call fast float @llvm.ldexp.f32(float %val, i32 %a)
ret float %call
}


declare float @ldexpf(float, i32) memory(none)

define fp128 @testExpl(fp128 %val, i32 %a) {
; CHECK-LABEL: testExpl:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: b ldexpl
; SVE-LABEL: testExpl:
; SVE: // %bb.0: // %entry
; SVE-NEXT: b ldexpl
;
; WINDOWS-LABEL: testExpl:
; WINDOWS: // %bb.0: // %entry
; WINDOWS-NEXT: b ldexpl
entry:
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
ret fp128 %call
Expand All @@ -47,14 +117,31 @@ entry:
declare fp128 @ldexpl(fp128, i32) memory(none)

define half @testExpf16(half %val, i32 %a) {
; CHECK-LABEL: testExpf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
; SVE-LABEL: testExpf16:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fcvt s0, h0
; SVE-NEXT: fmov s1, w0
; SVE-NEXT: ptrue p0.s
; SVE-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; SVE-NEXT: fcvt h0, s0
; SVE-NEXT: ret
;
; WINDOWS-LABEL: testExpf16:
; WINDOWS: .seh_proc testExpf16
; WINDOWS-NEXT: // %bb.0: // %entry
; WINDOWS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
; WINDOWS-NEXT: .seh_endprologue
; WINDOWS-NEXT: fcvt d0, h0
; WINDOWS-NEXT: bl ldexp
; WINDOWS-NEXT: fcvt h0, d0
; WINDOWS-NEXT: .seh_startepilogue
; WINDOWS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; WINDOWS-NEXT: .seh_save_reg_x x30, 16
; WINDOWS-NEXT: .seh_endepilogue
; WINDOWS-NEXT: ret
; WINDOWS-NEXT: .seh_endfunclet
; WINDOWS-NEXT: .seh_endproc
entry:
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
ret half %0
Expand Down
58 changes: 58 additions & 0 deletions llvm/test/CodeGen/ARM/ldexp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
; RUN: llc -mtriple=armv7-linux < %s -o - | FileCheck -check-prefix=LINUX %s
; RUN: llc -mtriple=thumbv7-windows-msvc -mattr=+thumb-mode < %s -o - | FileCheck -check-prefix=WINDOWS %s

define double @testExp(double %val, i32 %a) {
; LINUX: b ldexp{{$}}
; WINDOWS: b.w ldexp{{$}}
entry:
%call = tail call fast double @ldexp(double %val, i32 %a)
ret double %call
}

declare double @ldexp(double, i32) memory(none)

define double @testExpIntrinsic(double %val, i32 %a) {
; LINUX: b ldexp{{$}}
; WINDOWS: b.w ldexp{{$}}
entry:
%call = tail call fast double @llvm.ldexp.f64(double %val, i32 %a)
ret double %call
}

define float @testExpf(float %val, i32 %a) {
; LINUX: b ldexpf
; WINDOWS: b.w ldexpf
entry:
%call = tail call fast float @ldexpf(float %val, i32 %a)
ret float %call
}

define float @testExpfIntrinsic(float %val, i32 %a) {
; LINUX: b ldexpf
; WINDOWS: bl ldexp{{$}}
entry:
%call = tail call fast float @llvm.ldexp.f32(float %val, i32 %a)
ret float %call
}

declare float @ldexpf(float, i32) memory(none)

define fp128 @testExpl(fp128 %val, i32 %a) {
; LINUX: bl ldexpl
; WINDOWS: b.w ldexpl
entry:
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
ret fp128 %call
}

declare fp128 @ldexpl(fp128, i32) memory(none)

define half @testExpf16(half %val, i32 %a) {
; LINUX: bl ldexpf
; WINDOWS: bl ldexp{{$}}
entry:
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
ret half %0
}

declare half @llvm.ldexp.f16.i32(half, i32) memory(none)
Loading
Loading