-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SelectionDAG] Lower llvm.ldexp.f32 to ldexp() on Windows. #95301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This reduces codesize. As discussed in llvm#92707.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: Eli Friedman (efriedma-quic) ChangesThis reduces codesize. As discussed in #92707. Patch is 34.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95301.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8cd2bb60d81f2..1d9f2fe65e6fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3650,6 +3650,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// FIXME: Use separate LibCall action.
if (TLI.getLibcallName(LC))
break;
+ if (Node->getOpcode() == ISD::FLDEXP && VT == MVT::f32 &&
+ TLI.isTypeLegal(MVT::f64) &&
+ TLI.getLibcallName(RTLIB::getLDEXP(MVT::f64))) {
+ // On Windows, it's common to be missing the 32-bit libcall, but have
+ // the 64-bit libcall. Expand to the 64-bit libcall. (Note that ldexp
+ // involves a rounding step if the result is subnormal, but that isn't
+ // relevant here because any subnormal result will round to zero when
+ // it's truncated.)
+ //
+ // FIXME: Consider doing something similar for f16/bf16. But be very
+ // careful handling bf16: expanding bf16->f64 is fine, but expanding
+ // bf16->f32 would produce incorrect subnormal results.
+ SDValue Extended =
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Node->getOperand(0));
+ SDValue LdExp =
+ DAG.getNode(ISD::FLDEXP, dl, MVT::f64, Extended, Node->getOperand(1));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, VT, LdExp,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ }
if (SDValue Expanded = expandLdexp(Node)) {
Results.push_back(Expanded);
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index 2be5dec156690..d3b02dc8f9b7c 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=X64 %s
+; RUN: llc -mtriple=x86_64-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefixes=WIN64 %s
; RUN: llc -mtriple=i386-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefix=WIN32 %s
define float @ldexp_f32(i8 zeroext %x) {
@@ -8,75 +9,30 @@ define float @ldexp_f32(i8 zeroext %x) {
; X64-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-NEXT: jmp ldexpf@PLT # TAILCALL
;
+; WIN64-LABEL: ldexp_f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp
+; WIN64-NEXT: .seh_stackalloc 40
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movzbl %cl, %edx
+; WIN64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: addq $40, %rsp
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_f32:
; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D
-; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: jl LBB0_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: movl $381, %eax # imm = 0x17D
-; WIN32-NEXT: LBB0_2:
-; WIN32-NEXT: addl $-254, %eax
-; WIN32-NEXT: leal -127(%ecx), %edx
-; WIN32-NEXT: cmpl $255, %ecx
-; WIN32-NEXT: jae LBB0_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl %edx, %eax
-; WIN32-NEXT: LBB0_4:
-; WIN32-NEXT: flds __real@7f800000
-; WIN32-NEXT: flds __real@7f000000
-; WIN32-NEXT: jae LBB0_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_6:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: jge LBB0_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6
-; WIN32-NEXT: LBB0_8:
-; WIN32-NEXT: cmpl $-228, %ecx
-; WIN32-NEXT: fldz
-; WIN32-NEXT: flds __real@0c800000
-; WIN32-NEXT: jb LBB0_9
-; WIN32-NEXT: # %bb.10:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: leal 102(%ecx), %edx
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jge LBB0_12
-; WIN32-NEXT: jmp LBB0_13
-; WIN32-NEXT: LBB0_9:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: addl $204, %edx
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB0_13
-; WIN32-NEXT: LBB0_12:
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: LBB0_13:
+; WIN32-NEXT: subl $16, %esp
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: fld1
-; WIN32-NEXT: jl LBB0_15
-; WIN32-NEXT: # %bb.14:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_15:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $127, %ecx
-; WIN32-NEXT: jg LBB0_17
-; WIN32-NEXT: # %bb.16:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: movl %edx, %eax
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_17:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: shll $23, %eax
-; WIN32-NEXT: addl $1065353216, %eax # imm = 0x3F800000
-; WIN32-NEXT: movl %eax, (%esp)
-; WIN32-NEXT: fmuls (%esp)
-; WIN32-NEXT: popl %eax
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
%zext = zext i8 %x to i32
%ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext)
@@ -89,6 +45,12 @@ define double @ldexp_f64(i8 zeroext %x) {
; X64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-NEXT: jmp ldexp@PLT # TAILCALL
;
+; WIN64-LABEL: ldexp_f64:
+; WIN64: # %bb.0:
+; WIN64-NEXT: movzbl %cl, %edx
+; WIN64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; WIN64-NEXT: jmp ldexp # TAILCALL
+;
; WIN32-LABEL: ldexp_f64:
; WIN32: # %bb.0:
; WIN32-NEXT: subl $12, %esp
@@ -127,152 +89,60 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
+; WIN64-LABEL: ldexp_v2f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: .seh_pushreg %rsi
+; WIN64-NEXT: subq $64, %rsp
+; WIN64-NEXT: .seh_stackalloc 64
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm7, 48
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm6, 32
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movaps (%rcx), %xmm7
+; WIN64-NEXT: movl (%rdx), %eax
+; WIN64-NEXT: movl 4(%rdx), %esi
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %eax, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1,1,1]
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %esi, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: unpcklps {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1]
+; WIN64-NEXT: movaps %xmm6, %xmm0
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: addq $64, %rsp
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_v2f32:
; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $8, %esp
-; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: subl $20, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: cmpl $-329, %eax # imm = 0xFEB7
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: jge LBB2_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6
-; WIN32-NEXT: LBB2_2:
-; WIN32-NEXT: addl $204, %edx
-; WIN32-NEXT: leal 102(%eax), %ecx
-; WIN32-NEXT: cmpl $-228, %eax
-; WIN32-NEXT: jb LBB2_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: LBB2_4:
-; WIN32-NEXT: flds __real@0c800000
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: jb LBB2_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB2_6:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-126, %eax
-; WIN32-NEXT: jl LBB2_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: LBB2_8:
-; WIN32-NEXT: cmpl $381, %eax # imm = 0x17D
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: jl LBB2_10
-; WIN32-NEXT: # %bb.9:
-; WIN32-NEXT: movl $381, %esi # imm = 0x17D
-; WIN32-NEXT: LBB2_10:
-; WIN32-NEXT: flds __real@7f000000
-; WIN32-NEXT: fmul %st, %st(3)
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: leal -127(%eax), %ecx
-; WIN32-NEXT: cmpl $255, %eax
-; WIN32-NEXT: jae LBB2_11
-; WIN32-NEXT: # %bb.12:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: jmp LBB2_13
-; WIN32-NEXT: LBB2_11:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: addl $-254, %esi
-; WIN32-NEXT: movl %esi, %ecx
-; WIN32-NEXT: LBB2_13:
-; WIN32-NEXT: cmpl $127, %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: jg LBB2_15
-; WIN32-NEXT: # %bb.14:
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: LBB2_15:
-; WIN32-NEXT: cmpl $381, %esi # imm = 0x17D
-; WIN32-NEXT: movl %esi, %edx
-; WIN32-NEXT: jl LBB2_17
-; WIN32-NEXT: # %bb.16:
-; WIN32-NEXT: movl $381, %edx # imm = 0x17D
-; WIN32-NEXT: LBB2_17:
-; WIN32-NEXT: addl $-254, %edx
-; WIN32-NEXT: leal -127(%esi), %edi
-; WIN32-NEXT: cmpl $255, %esi
-; WIN32-NEXT: jae LBB2_19
-; WIN32-NEXT: # %bb.18:
-; WIN32-NEXT: movl %edi, %edx
-; WIN32-NEXT: LBB2_19:
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: fmul %st, %st(2)
-; WIN32-NEXT: jae LBB2_21
-; WIN32-NEXT: # %bb.20:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_21:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $-329, %esi # imm = 0xFEB7
-; WIN32-NEXT: movl %esi, %edi
-; WIN32-NEXT: jge LBB2_23
-; WIN32-NEXT: # %bb.22:
-; WIN32-NEXT: movl $-330, %edi # imm = 0xFEB6
-; WIN32-NEXT: LBB2_23:
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(4), %st
-; WIN32-NEXT: fmul %st, %st(4)
-; WIN32-NEXT: cmpl $-228, %esi
-; WIN32-NEXT: jb LBB2_24
-; WIN32-NEXT: # %bb.25:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: leal 102(%esi), %edi
-; WIN32-NEXT: cmpl $-126, %esi
-; WIN32-NEXT: jge LBB2_27
-; WIN32-NEXT: jmp LBB2_28
-; WIN32-NEXT: LBB2_24:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: addl $204, %edi
-; WIN32-NEXT: cmpl $-126, %esi
-; WIN32-NEXT: jl LBB2_28
-; WIN32-NEXT: LBB2_27:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: movl %esi, %edi
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_28:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $127, %esi
-; WIN32-NEXT: jg LBB2_30
-; WIN32-NEXT: # %bb.29:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: movl %edi, %edx
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(2)
-; WIN32-NEXT: LBB2_30:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: cmpl $127, %eax
-; WIN32-NEXT: jg LBB2_32
-; WIN32-NEXT: # %bb.31:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_32:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: shll $23, %ecx
-; WIN32-NEXT: addl $1065353216, %ecx # imm = 0x3F800000
-; WIN32-NEXT: movl %ecx, (%esp)
-; WIN32-NEXT: shll $23, %edx
-; WIN32-NEXT: addl $1065353216, %edx # imm = 0x3F800000
-; WIN32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fmuls (%esp)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fmuls {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl $8, %esp
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: popl %esi
-; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp)
ret <2 x float> %1
@@ -319,335 +189,106 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
+; WIN64-LABEL: ldexp_v4f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: .seh_pushreg %rsi
+; WIN64-NEXT: subq $80, %rsp
+; WIN64-NEXT: .seh_stackalloc 80
+; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm8, 64
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm7, 48
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm6, 32
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movq %rdx, %rsi
+; WIN64-NEXT: movaps (%rcx), %xmm7
+; WIN64-NEXT: movl 12(%rdx), %edx
+; WIN64-NEXT: movaps %xmm7, %xmm0
+; WIN64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3],xmm7[3,3]
+; WIN64-NEXT: cvtss2sd %xmm0, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: movl 8(%rsi), %edx
+; WIN64-NEXT: movaps %xmm7, %xmm0
+; WIN64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm7[1]
+; WIN64-NEXT: cvtss2sd %xmm0, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm8, %xmm8
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm8
+; WIN64-NEXT: unpcklps {{.*#+}} xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
+; WIN64-NEXT: movl (%rsi), %edx
+; WIN64-NEXT: movl 4(%rsi), %esi
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1,1,1]
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %esi, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: unpcklps {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1]
+; WIN64-NEXT: movlhps {{.*#+}} xmm6 = xmm6[0],xmm8[0]
+; WIN64-NEXT: movaps %xmm6, %xmm0
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
+; WIN64-NEXT: addq $80, %rsp
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_v4f32:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $32, %esp
+; WIN32-NEXT: subl $44, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebp, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: flds __real@7f000000
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: cmpl $255, %ecx
-; WIN32-NEXT: jae LBB3_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_2:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7
-; WIN32-NEXT: movl %ecx, %esi
-; WIN32-NEXT: jge LBB3_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl $-330, %esi # imm = 0xFEB6
-; WIN32-NEXT: LBB3_4:
-; WIN32-NEXT: addl $204, %esi
-; WIN32-NEXT: leal 102(%ecx), %eax
-; WIN32-NEXT: cmpl $-228, %ecx
-; WIN32-NEXT: jb LBB3_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: LBB3_6:
-; WIN32-NEXT: flds __real@0c800000
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: jb LBB3_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_8:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB3_10
-; WIN32-NEXT: # %bb.9:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(4)
-; WIN32-NEXT: LBB3_10:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: subl $127, %edx
-; WIN32-NEXT: jg LBB3_12
-; WIN32-NEXT: # %bb.11:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(3)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_12:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(3), %st
-; WIN32-NEXT: cmpl $255, %edi
-; WIN32-NEXT: jae LBB3_14
-; WIN32-NEXT: # %bb.13:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_14:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: jge LBB3_16
-; WIN32-NEXT: # %bb.15:
-; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6
-; WIN32-NEXT: LBB3_16:
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(3), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(4), %st
-; WIN32-NEXT: cmpl $-228, %edi
-; WIN32-NEXT: jb LBB3_17
-; WIN32-NEXT: # %bb.18:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 102(%edi), %eax
-; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jge LBB3_20
-; WIN32-NEXT: jmp LBB3_21
-; WIN32-NEXT: LBB3_17:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: addl $204, %eax
-; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jl LBB3_21
-; WIN32-NEXT: LBB3_20:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(4)
-; WIN32-NEXT: LBB3_21:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: movl %edi, %ebx
-; WIN32-NEXT: subl $127, %ebx
-; WIN32-NEXT: jg LBB3_23
-; WIN32-NEXT: # %bb.22:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(3)
-; WIN32-NEXT: LBB3_23:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: jge LBB3_24
-; WIN32-NEXT: # %bb.25:
-; WIN32-NEXT: cmpl $255, %edi
-; WIN32-NEXT: jae LBB3_26
-; WIN32-NEXT: LBB3_27:
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB3_29
-; WIN32-NEXT: LBB3_28:
-; WIN32-NEXT: movl %ecx, %esi
-; WIN32-NEXT: LBB3_29:
-; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D
-; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: jl LBB3_31
-; WIN32-NEXT: # %bb.30:
-; WIN32-NEXT: movl $381, %eax # imm = 0x17D
-; WIN32-NEXT: LBB3_31:
-; WIN32-NEXT: cmpl $255, %ecx
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: jb LBB3_33
-; WIN32-NEXT: # %bb.32:
-; WIN32-NEXT: addl $-254, %eax
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: LBB3_33:
-; WIN32-NEXT: fxc...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you instead set the action to promote to avoid repeating the extend + round sequence?
Updated patch to mark "promote". I don't know of any way to do that cross-target, so this is now done per-target; not sure if this is what you expected. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find the weird base class handling of same-os-different-target strangle to begin with
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/716 Here is the relevant piece of the build log for the reference:
|
This reduces codesize. As discussed in llvm#92707.
This reduces codesize. As discussed in #92707.