Skip to content

Commit a6e3027

Browse files
icedrockettopperc
icedrocket
authored andcommitted
[X86] Avoid converting u64 to f32 using x87 on Windows
The code below currently prints less accurate values only on Windows 32-bit. On Windows, the default precision control on x87 is only 53-bit, and FADD triggers rounding with that precision, so the final result may be less accurate. This revision avoids less accurate conversions by using library calls instead. ``` int main() { int64_t n = 0b0000000000111111111111111111111111011111111111111111111111111111; printf("%lld, %.0f, %.0f", n, (float)n, (float)(uint64_t)n); return 0; } ``` Reviewed By: craig.topper, lebedev.ri Differential Revision: https://reviews.llvm.org/D141074
1 parent 83d56fb commit a6e3027

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -21898,6 +21898,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
2189821898
}
2189921899

2190021900
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
21901+
21902+
// On Windows, the default precision control on x87 is only 53-bit, and FADD
21903+
// triggers rounding with that precision, so the final result may be less
21904+
// accurate. 18014397972611071 is one such case.
21905+
if (Subtarget.isOSWindows())
21906+
return SDValue();
21907+
2190121908
SDValue ValueToStore = Src;
2190221909
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
2190321910
// Bitcasting to f64 here allows us to do a single 64-bit store from

llvm/test/CodeGen/X86/uint64-to-float.ll

+34-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3-
; RUN: llc < %s -mtriple=x86_64-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
2+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
4+
; RUN: llc < %s -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86-WIN
5+
; RUN: llc < %s -mtriple=x86_64-windows -mattr=+sse2 | FileCheck %s --check-prefix=X64-WIN
46

57
; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
68
; by the compiler_rt implementation of __floatundisf.
@@ -42,6 +44,36 @@ define float @test(i64 %a) nounwind {
4244
; X64-NEXT: cvtsi2ss %rdi, %xmm0
4345
; X64-NEXT: addss %xmm0, %xmm0
4446
; X64-NEXT: retq
47+
;
48+
; X86-WIN-LABEL: test:
49+
; X86-WIN: # %bb.0: # %entry
50+
; X86-WIN-NEXT: pushl %ebp
51+
; X86-WIN-NEXT: movl %esp, %ebp
52+
; X86-WIN-NEXT: andl $-8, %esp
53+
; X86-WIN-NEXT: subl $8, %esp
54+
; X86-WIN-NEXT: pushl 12(%ebp)
55+
; X86-WIN-NEXT: pushl 8(%ebp)
56+
; X86-WIN-NEXT: calll ___floatundisf
57+
; X86-WIN-NEXT: addl $8, %esp
58+
; X86-WIN-NEXT: movl %ebp, %esp
59+
; X86-WIN-NEXT: popl %ebp
60+
; X86-WIN-NEXT: retl
61+
;
62+
; X64-WIN-LABEL: test:
63+
; X64-WIN: # %bb.0: # %entry
64+
; X64-WIN-NEXT: testq %rcx, %rcx
65+
; X64-WIN-NEXT: js .LBB0_1
66+
; X64-WIN-NEXT: # %bb.2: # %entry
67+
; X64-WIN-NEXT: cvtsi2ss %rcx, %xmm0
68+
; X64-WIN-NEXT: retq
69+
; X64-WIN-NEXT: .LBB0_1:
70+
; X64-WIN-NEXT: movq %rcx, %rax
71+
; X64-WIN-NEXT: shrq %rax
72+
; X64-WIN-NEXT: andl $1, %ecx
73+
; X64-WIN-NEXT: orq %rax, %rcx
74+
; X64-WIN-NEXT: cvtsi2ss %rcx, %xmm0
75+
; X64-WIN-NEXT: addss %xmm0, %xmm0
76+
; X64-WIN-NEXT: retq
4577
entry:
4678
%b = uitofp i64 %a to float
4779
ret float %b

0 commit comments

Comments
 (0)