From 566e8e12e1050db92f9da94a2a531b2880121271 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 05:02:47 +0000
Subject: [PATCH 1/2] Add a generic version of `fmod`

This can replace `fmod` and `fmodf`. As part of this change I was able
to replace some of the `while` loops with `leading_zeros`.
---
 etc/function-definitions.json  |  6 ++-
 src/math/fmod.rs               | 77 +-----------------------------
 src/math/fmodf.rs              | 87 +---------------------------------
 src/math/generic/fmod.rs       | 84 ++++++++++++++++++++++++++++++++
 src/math/generic/mod.rs        |  2 +
 src/math/support/int_traits.rs |  2 +
 6 files changed, 96 insertions(+), 162 deletions(-)
 create mode 100644 src/math/generic/fmod.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index b6653295c..866e9a439 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -437,13 +437,15 @@
     "fmod": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fmod.rs"
+            "src/math/fmod.rs",
+            "src/math/generic/fmod.rs"
         ],
         "type": "f64"
     },
     "fmodf": {
         "sources": [
-            "src/math/fmodf.rs"
+            "src/math/fmodf.rs",
+            "src/math/generic/fmod.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
index b68e6b0ea..d9786b53d 100644
--- a/src/math/fmod.rs
+++ b/src/math/fmod.rs
@@ -1,78 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = ((uxi >> 52) & 0x7ff) as i64;
-    let mut ey = ((uyi >> 52) & 0x7ff) as i64;
-    let sx = uxi >> 63;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff {
-        return (x * y) / (x * y);
-    }
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 12;
-        while i >> 63 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u64::MAX >> 12;
-        uxi |= 1 << 52;
-    }
-    if ey == 0 {
-        i = uyi << 12;
-        while i >> 63 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u64::MAX >> 12;
-        uyi |= 1 << 52;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 63 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-        ex -= 1;
-    }
-    i = uxi.wrapping_sub(uyi);
-    if i >> 63 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-    while uxi >> 52 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result */
-    if ex > 0 {
-        uxi -= 1 << 52;
-        uxi |= (ex as u64) << 52;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= sx << 63;
-
-    f64::from_bits(uxi)
+    super::generic::fmod(x, y)
 }
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
index 4de181957..4e95696e2 100644
--- a/src/math/fmodf.rs
+++ b/src/math/fmodf.rs
@@ -1,88 +1,5 @@
-use core::f32;
-
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmodf(x: f32, y: f32) -> f32 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = ((uxi >> 23) & 0xff) as i32;
-    let mut ey = ((uyi >> 23) & 0xff) as i32;
-    let sx = uxi & 0x80000000;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0xff {
-        return (x * y) / (x * y);
-    }
-
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 9;
-        while i >> 31 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u32::MAX >> 9;
-        uxi |= 1 << 23;
-    }
-
-    if ey == 0 {
-        i = uyi << 9;
-        while i >> 31 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u32::MAX >> 9;
-        uyi |= 1 << 23;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 31 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-
-        ex -= 1;
-    }
-
-    i = uxi.wrapping_sub(uyi);
-    if i >> 31 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-
-    while uxi >> 23 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result up */
-    if ex > 0 {
-        uxi -= 1 << 23;
-        uxi |= (ex as u32) << 23;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= sx;
-
-    f32::from_bits(uxi)
+    super::generic::fmod(x, y)
 }
diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
new file mode 100644
index 000000000..93da6c51e
--- /dev/null
+++ b/src/math/generic/fmod.rs
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::super::{CastFrom, Float, Int, MinInt};
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmod<F: Float>(x: F, y: F) -> F {
+    let zero = F::Int::ZERO;
+    let one = F::Int::ONE;
+    let mut ix = x.to_bits();
+    let mut iy = y.to_bits();
+    let mut ex = x.exp().signed();
+    let mut ey = y.exp().signed();
+    let sx = ix & F::SIGN_MASK;
+
+    if iy << 1 == zero || y.is_nan() || ex == F::EXP_MAX as i32 {
+        return (x * y) / (x * y);
+    }
+
+    if ix << 1 <= iy << 1 {
+        if ix << 1 == iy << 1 {
+            return F::ZERO * x;
+        }
+        return x;
+    }
+
+    /* normalize x and y */
+    if ex == 0 {
+        let i = ix << F::EXP_BITS;
+        ex -= i.leading_zeros() as i32;
+        ix <<= -ex + 1;
+    } else {
+        ix &= F::Int::MAX >> F::EXP_BITS;
+        ix |= one << F::SIG_BITS;
+    }
+
+    if ey == 0 {
+        let i = iy << F::EXP_BITS;
+        ey -= i.leading_zeros() as i32;
+        iy <<= -ey + 1;
+    } else {
+        iy &= F::Int::MAX >> F::EXP_BITS;
+        iy |= one << F::SIG_BITS;
+    }
+
+    /* x mod y */
+    while ex > ey {
+        let i = ix.wrapping_sub(iy);
+        if i >> (F::BITS - 1) == zero {
+            if i == zero {
+                return F::ZERO * x;
+            }
+            ix = i;
+        }
+
+        ix <<= 1;
+        ex -= 1;
+    }
+
+    let i = ix.wrapping_sub(iy);
+    if i >> (F::BITS - 1) == zero {
+        if i == zero {
+            return F::ZERO * x;
+        }
+
+        ix = i;
+    }
+
+    let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS);
+    ix <<= shift;
+    ex -= shift as i32;
+
+    /* scale result */
+    if ex > 0 {
+        ix -= one << F::SIG_BITS;
+        ix |= F::Int::cast_from(ex) << F::SIG_BITS;
+    } else {
+        ix >>= -ex + 1;
+    }
+
+    ix |= sx;
+
+    F::from_bits(ix)
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 819781a21..68686b0b2 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -5,6 +5,7 @@ mod fdim;
 mod floor;
 mod fmax;
 mod fmin;
+mod fmod;
 mod rint;
 mod round;
 mod scalbn;
@@ -18,6 +19,7 @@ pub use fdim::fdim;
 pub use floor::floor;
 pub use fmax::fmax;
 pub use fmin::fmin;
+pub use fmod::fmod;
 pub use rint::rint;
 pub use round::round;
 pub use scalbn::scalbn;
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index cf19762e8..b403c658c 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -45,7 +45,9 @@ pub trait Int:
     + ops::BitOrAssign
     + ops::BitXorAssign
     + ops::ShlAssign<i32>
+    + ops::ShlAssign<u32>
     + ops::ShrAssign<u32>
+    + ops::ShrAssign<i32>
     + ops::Add<Output = Self>
     + ops::Sub<Output = Self>
     + ops::Mul<Output = Self>

From 856e314b04f2c1ce9cd9b2587402ea7106f91f01 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 05:09:08 +0000
Subject: [PATCH 2/2] Add `fmodf16` using the generic implementation

---
 crates/libm-macros/src/shared.rs             |  2 +-
 crates/libm-test/benches/icount.rs           |  1 +
 crates/libm-test/benches/random.rs           |  1 +
 crates/libm-test/src/mpfloat.rs              | 17 +++++++++++++++++
 crates/libm-test/tests/compare_built_musl.rs |  1 +
 crates/util/src/main.rs                      |  1 +
 etc/function-definitions.json                |  7 +++++++
 etc/function-list.txt                        |  1 +
 src/math/fmodf16.rs                          |  5 +++++
 src/math/mod.rs                              |  2 ++
 10 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 src/math/fmodf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index fbe0702a6..69fe45e03 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
         None,
-        &["copysignf16", "fdimf16", "fmaxf16", "fminf16"],
+        &["copysignf16", "fdimf16", "fmaxf16", "fminf16", "fmodf16"],
     ),
     (
         // `(f32, f32) -> f32`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 84f953262..97e78d8f1 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -111,6 +111,7 @@ main!(
     icount_bench_fmin_group,
     icount_bench_fminf_group,
     icount_bench_fmod_group,
+    icount_bench_fmodf16_group,
     icount_bench_fmodf_group,
     icount_bench_frexp_group,
     icount_bench_frexpf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index aac8379fd..3e816e81a 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -131,6 +131,7 @@ libm_macros::for_each_function! {
         | fmaxf16
         | fminf128
         | fminf16
+        | fmodf16
         | rintf128
         | rintf16
         | roundf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index da674c162..56234b14a 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -152,6 +152,7 @@ libm_macros::for_each_function! {
         floorf16,
         fmod,
         fmodf,
+        fmodf16,
         frexp,
         frexpf,
         ilogb,
@@ -525,6 +526,22 @@ impl MpOp for crate::op::lgammaf_r::Routine {
     }
 }
 
+// No fmodf128 yet
+impl MpOp for crate::op::fmodf16::Routine {
+    type MpTy = (MpFloat, MpFloat);
+
+    fn new_mp() -> Self::MpTy {
+        (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.0.assign(input.0);
+        this.1.assign(input.1);
+        let ord = this.0.rem_assign_round(&this.1, Nearest);
+        prep_retval::<Self::RustRet>(&mut this.0, ord)
+    }
+}
+
 /* stub implementations so we don't need to special case them */
 
 impl MpOp for crate::op::nextafter::Routine {
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index ca070e8f6..46474c046 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -93,6 +93,7 @@ libm_macros::for_each_function! {
         fmaxf16,
         fminf128,
         fminf16,
+        fmodf16,
         rintf128,
         rintf16,
         roundf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index eb8e37589..999b03af9 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -100,6 +100,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fmaxf16
             | fminf128
             | fminf16
+            | fmodf16
             | rintf128
             | rintf16
             | roundf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 866e9a439..966060f77 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -449,6 +449,13 @@
         ],
         "type": "f32"
     },
+    "fmodf16": {
+        "sources": [
+            "src/math/fmodf16.rs",
+            "src/math/generic/fmod.rs"
+        ],
+        "type": "f16"
+    },
     "frexp": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 25b92e58b..ff4de0cb5 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -63,6 +63,7 @@ fminf128
 fminf16
 fmod
 fmodf
+fmodf16
 frexp
 frexpf
 hypot
diff --git a/src/math/fmodf16.rs b/src/math/fmodf16.rs
new file mode 100644
index 000000000..11972a7de
--- /dev/null
+++ b/src/math/fmodf16.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index cb83b2587..aab551bed 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -348,6 +348,7 @@ cfg_if! {
         mod floorf16;
         mod fmaxf16;
         mod fminf16;
+        mod fmodf16;
         mod rintf16;
         mod roundf16;
         mod sqrtf16;
@@ -360,6 +361,7 @@ cfg_if! {
         pub use self::floorf16::floorf16;
         pub use self::fmaxf16::fmaxf16;
         pub use self::fminf16::fminf16;
+        pub use self::fmodf16::fmodf16;
         pub use self::rintf16::rintf16;
         pub use self::roundf16::roundf16;
         pub use self::sqrtf16::sqrtf16;