Skip to content

Commit c80c09f

Browse files
committed
[CalcSpillWeights] Avoid x87 excess precision influencing weight result
Fixes #99396 The result of `VirtRegAuxInfo::weightCalcHelper` can be influenced by x87 excess precision, which can result in slightly different register choices when the compiler is hosted on x86_64 or i386. This leads to different object file output when cross-compiling to i386, or native. Similar to 7af3432, we need to add a `volatile` qualifier to the local `Weight` variable to force it onto the stack, and avoid the excess precision. Define `stack_float_t` in `MathExtras.h` for this purpose, and use it.
1 parent 361d4cf commit c80c09f

File tree

3 files changed

+70
-5
lines changed

3 files changed

+70
-5
lines changed

llvm/include/llvm/Support/MathExtras.h

+8
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,14 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
770770
#endif
771771
}
772772

773+
/// Type to force float point values onto the stack, so that x86 doesn't add
774+
/// hidden precision, avoiding rounding differences on various platforms.
775+
#if defined(__i386__) || defined(_M_IX86)
776+
using stack_float_t = volatile float;
777+
#else
778+
using stack_float_t = float;
779+
#endif
780+
773781
} // namespace llvm
774782

775783
#endif

llvm/lib/CodeGen/CalcSpillWeights.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/CodeGen/TargetSubtargetInfo.h"
2323
#include "llvm/CodeGen/VirtRegMap.h"
2424
#include "llvm/Support/Debug.h"
25+
#include "llvm/Support/MathExtras.h"
2526
#include "llvm/Support/raw_ostream.h"
2627
#include <cassert>
2728
#include <tuple>
@@ -257,7 +258,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
257258
return -1.0f;
258259
}
259260

260-
float Weight = 1.0f;
261+
// Force Weight onto the stack so that x86 doesn't add hidden precision,
262+
// similar to HWeight below.
263+
stack_float_t Weight = 1.0f;
261264
if (IsSpillable) {
262265
// Get loop info for mi.
263266
if (MI->getParent() != MBB) {
@@ -284,11 +287,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
284287
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
285288
if (!HintReg)
286289
continue;
287-
// Force hweight onto the stack so that x86 doesn't add hidden precision,
290+
// Force HWeight onto the stack so that x86 doesn't add hidden precision,
288291
// making the comparison incorrectly pass (i.e., 1 > 1 == true??).
289-
//
290-
// FIXME: we probably shouldn't use floats at all.
291-
volatile float HWeight = Hint[HintReg] += Weight;
292+
stack_float_t HWeight = Hint[HintReg] += Weight;
292293
if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
293294
CopyHints.insert(CopyHint(HintReg, HWeight));
294295
}

llvm/test/CodeGen/X86/pr99396.ll

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; RUN: llc < %s -mtriple=i386-unknown-freebsd -enable-misched -relocation-model=pic | FileCheck %s
2+
3+
@c = external local_unnamed_addr global ptr
4+
5+
declare i32 @fn2() local_unnamed_addr
6+
7+
declare i32 @fn3() local_unnamed_addr
8+
9+
define noundef i32 @fn4() #0 {
10+
entry:
11+
%tmp0 = load i32, ptr @fn4, align 4
12+
; CHECK: movl fn4@GOT(%ebx), %edi
13+
; CHECK-NEXT: movl (%edi), %edx
14+
%tmp1 = load ptr, ptr @c, align 4
15+
; CHECK: movl c@GOT(%ebx), %eax
16+
; CHECK-NEXT: movl (%eax), %esi
17+
; CHECK-NEXT: testl %esi, %esi
18+
%cmp.g = icmp eq ptr %tmp1, null
19+
br i1 %cmp.g, label %if.then.g, label %if.end3.g
20+
21+
if.then.g: ; preds = %entry
22+
%tmp2 = load i32, ptr inttoptr (i32 1 to ptr), align 4
23+
%cmp1.g = icmp slt i32 %tmp2, 0
24+
br i1 %cmp1.g, label %if.then2.g, label %if.end3.g
25+
26+
if.then2.g: ; preds = %if.then.g
27+
%.g = load volatile i32, ptr null, align 2147483648
28+
br label %f.exit
29+
30+
if.end3.g: ; preds = %if.then.g, %entry
31+
%h.i.g = icmp eq i32 %tmp0, 0
32+
br i1 %h.i.g, label %f.exit, label %while.body.g
33+
34+
while.body.g: ; preds = %if.end3.g, %if.end8.g
35+
%buff.addr.019.g = phi ptr [ %incdec.ptr.g, %if.end8.g ], [ @fn4, %if.end3.g ]
36+
%g.addr.018.g = phi i32 [ %dec.g, %if.end8.g ], [ %tmp0, %if.end3.g ]
37+
%call4.g = tail call i32 @fn3(ptr %tmp1, ptr %buff.addr.019.g, i32 %g.addr.018.g)
38+
%cmp5.g = icmp slt i32 %call4.g, 0
39+
br i1 %cmp5.g, label %if.then6.g, label %if.end8.g
40+
41+
if.then6.g: ; preds = %while.body.g
42+
%call7.g = tail call i32 @fn2(ptr null)
43+
br label %f.exit
44+
45+
if.end8.g: ; preds = %while.body.g
46+
%dec.g = add i32 %g.addr.018.g, 1
47+
%incdec.ptr.g = getelementptr i32, ptr %buff.addr.019.g, i32 1
48+
store i64 0, ptr %tmp1, align 4
49+
%h.not.g = icmp eq i32 %dec.g, 0
50+
br i1 %h.not.g, label %f.exit, label %while.body.g
51+
52+
f.exit: ; preds = %if.end8.g, %if.then6.g, %if.end3.g, %if.then2.g
53+
ret i32 0
54+
}
55+
56+
attributes #0 = { "frame-pointer"="all" "tune-cpu"="generic" }

0 commit comments

Comments
 (0)