Merge pull request #637 from beetrees/fix-float-mul

Amanieu · web-flow · commit c1c8fcc3265d · 2024-07-05T22:17:22.000+02:00
Fix incorrect rounding with subnormal/zero results of float multiplication
diff --git a/src/float/mul.rs b/src/float/mul.rs
@@ -149,18 +149,13 @@ where
         }
 
         // Otherwise, shift the significand of the result so that the round
-        // bit is the high bit of productLo.
-        if shift < bits {
-            let sticky = product_low << (bits - shift);
-            product_low = product_high << (bits - shift) | product_low >> shift | sticky;
-            product_high >>= shift;
-        } else if shift < (2 * bits) {
-            let sticky = product_high << (2 * bits - shift) | product_low;
-            product_low = product_high >> (shift - bits) | sticky;
-            product_high = zero;
-        } else {
-            product_high = zero;
-        }
+        // bit is the high bit of `product_low`.
+        // Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are
+        // not all zero so that the result is correctly rounded below.
+        let sticky = product_low << (bits - shift) != zero;
+        product_low =
+            product_high << (bits - shift) | product_low >> shift | (sticky as u32).cast();
+        product_high >>= shift;
     } else {
         // Result is normal before rounding; insert the exponent.
         product_high &= significand_mask;
diff --git a/src/probestack.rs b/src/probestack.rs
@@ -120,6 +120,10 @@ macro_rules! define_rust_probestack {
     };
 }
 
+// FIXME(rust-lang/rust#126984): Remove allow once lint is fixed
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+use define_rust_probestack;
+
 // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
 // ensuring that if any pages are unmapped we'll make a page fault.
 //
diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs
@@ -17,9 +17,10 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         "extend_f16_f32",
         "trunc_f32_f16",
         "trunc_f64_f16",
-        // FIXME(f16_f128): rounding error
+        // FIXME(#616): re-enable once fix is in nightly
         // <https://github.com/rust-lang/compiler-builtins/issues/616>
-        "mul_f128",
+        "mul_f32",
+        "mul_f64",
     ];
 
     // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
@@ -29,7 +30,13 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
 
     // FIXME(f16_f128): system symbols have incorrect results
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"];
+    const X86_NO_SSE_SKIPPED: &[&str] =
+        &["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"];
+
+    // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
+    // uses `compiler-rt` version.
+    // <https://github.com/llvm/llvm-project/issues/91840>
+    const AARCH64_SKIPPED: &[&str] = &["mul_f128"];
 
     // FIXME(llvm): system symbols have incorrect results on Windows
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
@@ -61,6 +68,10 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
+    if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) {
+        return true;
+    }
+
     if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
         return true;
     }
diff --git a/testcrate/tests/mul.rs b/testcrate/tests/mul.rs
@@ -107,14 +107,11 @@ macro_rules! float_mul {
                 fuzz_float_2(N, |x: $f, y: $f| {
                     let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y);
                     let mul1: $f = $fn(x, y);
-                    // multiplication of subnormals is not currently handled
-                    if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) {
-                        if !Float::eq_repr(mul0, mul1) {
-                            panic!(
-                                "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
-                                stringify!($fn), x, y, mul0, mul1
-                            );
-                        }
+                    if !Float::eq_repr(mul0, mul1) {
+                        panic!(
+                            "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
+                            stringify!($fn), x, y, mul0, mul1
+                        );
                     }
                 });
             }
@@ -126,9 +123,11 @@ macro_rules! float_mul {
 mod float_mul {
     use super::*;
 
+    // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
+    // nightly.
     float_mul! {
-        f32, __mulsf3, Single, all();
-        f64, __muldf3, Double, all();
+        f32, __mulsf3, Single, not(target_arch = "arm");
+        f64, __muldf3, Double, not(target_arch = "arm");
     }
 }