From 857386c073473c4b3ca9217d1c8a132e0b88d697 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 29 Sep 2021 22:54:04 -0400
Subject: [PATCH 1/4] wip

---
 pandas/_libs/algos.pxd               |   4 +-
 pandas/_libs/algos.pyx               | 121 +++++++++++-------------
 pandas/_libs/dtypes.pxd              |  27 ++++++
 pandas/_libs/groupby.pyx             | 133 +++++++++++++--------------
 pandas/_libs/join.pyx                | 101 +++++++-------------
 pandas/_libs/reshape.pyx             |  28 +-----
 pandas/_libs/util.pxd                |  15 ---
 pandas/_libs/window/aggregations.pyx |  26 +++---
 8 files changed, 196 insertions(+), 259 deletions(-)

diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd
index 4f7cc9345ed30..fdeff2ed11805 100644
--- a/pandas/_libs/algos.pxd
+++ b/pandas/_libs/algos.pxd
@@ -1,7 +1,7 @@
-from pandas._libs.util cimport numeric
+from pandas._libs.dtypes cimport numeric_t
 
 
-cdef numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil
+cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
 
 cdef enum TiebreakEnumType:
     TIEBREAK_AVERAGE
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 22e2abc9b9c36..7bb95ecf1ce35 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -45,7 +45,7 @@ from numpy cimport (
 cnp.import_array()
 
 cimport pandas._libs.util as util
-from pandas._libs.dtypes cimport numeric_object_t
+from pandas._libs.dtypes cimport numeric_object_t, numeric_t, iu_64_floating_obj_t
 from pandas._libs.khash cimport (
     kh_destroy_int64,
     kh_get_int64,
@@ -57,7 +57,6 @@ from pandas._libs.khash cimport (
 )
 from pandas._libs.util cimport (
     get_nat,
-    numeric,
 )
 
 import pandas._libs.missing as missing
@@ -240,9 +239,9 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
     return indexer.base, counts.base
 
 
-cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
+cdef inline Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil:
     cdef:
-        numeric t
+        numeric_t t
 
     # cython doesn't allow pointer dereference so use array syntax
     t = a[0]
@@ -251,7 +250,7 @@ cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
     return 0
 
 
-cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil:
+cdef inline numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil:
     """
     See kth_smallest.__doc__. The additional parameter n specifies the maximum
     number of elements considered in arr, needed for compatibility with usage
@@ -259,7 +258,7 @@ cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nog
     """
     cdef:
         Py_ssize_t i, j, l, m
-        numeric x
+        numeric_t x
 
     l = 0
     m = n - 1
@@ -291,7 +290,7 @@ cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nog
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def kth_smallest(numeric[::1] arr, Py_ssize_t k) -> numeric:
+def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t:
     """
     Compute the kth smallest value in arr. Note that the input
     array will be modified.
@@ -309,7 +308,7 @@ def kth_smallest(numeric[::1] arr, Py_ssize_t k) -> numeric:
         The kth smallest value in arr
     """
     cdef:
-        numeric result
+        numeric_t result
 
     with nogil:
         result = kth_smallest_c(&arr[0], k, arr.shape[0])
@@ -514,20 +513,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
 
 # ----------------------------------------------------------------------
 
-ctypedef fused algos_t:
-    float64_t
-    float32_t
-    object
-    int64_t
-    int32_t
-    int16_t
-    int8_t
-    uint64_t
-    uint32_t
-    uint16_t
-    uint8_t
-
-
 def validate_limit(nobs: int | None, limit=None) -> int:
     """
     Check that the `limit` argument is a positive integer.
@@ -556,12 +541,12 @@ def validate_limit(nobs: int | None, limit=None) -> int:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
+def pad(ndarray[numeric_object_t] old, ndarray[numeric_object_t] new, limit=None) -> ndarray:
     # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t, ndim=1] indexer
-        algos_t cur, next_val
+        numeric_object_t cur, next_val
         int lim, fill_count = 0
 
     nleft = len(old)
@@ -614,10 +599,10 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
+def pad_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None):
     cdef:
         Py_ssize_t i, N
-        algos_t val
+        numeric_object_t val
         uint8_t prev_mask
         int lim, fill_count = 0
 
@@ -646,10 +631,10 @@ def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
+def pad_2d_inplace(numeric_object_t[:, :] values, const uint8_t[:, :] mask, limit=None):
     cdef:
         Py_ssize_t i, j, N, K
-        algos_t val
+        numeric_object_t val
         int lim, fill_count = 0
 
     K, N = (<object>values).shape
@@ -702,12 +687,12 @@ D
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
+def backfill(ndarray[numeric_object_t] old, ndarray[numeric_object_t] new, limit=None) -> ndarray:
     # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t, ndim=1] indexer
-        algos_t cur, prev
+        numeric_object_t cur, prev
         int lim, fill_count = 0
 
     nleft = len(old)
@@ -759,11 +744,11 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     return indexer
 
 
-def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
+def backfill_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None):
     pad_inplace(values[::-1], mask[::-1], limit=limit)
 
 
-def backfill_2d_inplace(algos_t[:, :] values,
+def backfill_2d_inplace(numeric_object_t[:, :] values,
                         const uint8_t[:, :] mask,
                         limit=None):
     pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit)
@@ -771,7 +756,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
+def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
     """
     Returns
     -------
@@ -782,7 +767,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     """
     cdef:
         Py_ssize_t i, n
-        algos_t prev, cur
+        numeric_object_t prev, cur
         bint is_monotonic_inc = 1
         bint is_monotonic_dec = 1
         bint is_unique = 1
@@ -802,7 +787,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     if timelike and <int64_t>arr[0] == NPY_NAT:
         return False, False, True
 
-    if algos_t is not object:
+    if numeric_object_t is not object:
         with nogil:
             prev = arr[0]
             for i in range(1, n):
@@ -861,9 +846,9 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
 # rank_1d, rank_2d
 # ----------------------------------------------------------------------
 
-cdef numeric_object_t get_rank_nan_fill_val(
+cdef iu_64_floating_obj_t get_rank_nan_fill_val(
         bint rank_nans_highest,
-        numeric_object_t[:] _=None
+        iu_64_floating_obj_t[:] _=None
 ):
     """
     Return the value we'll use to represent missing values when sorting depending
@@ -871,20 +856,20 @@ cdef numeric_object_t get_rank_nan_fill_val(
     is unused, but needed for fused type specialization)
     """
     if rank_nans_highest:
-        if numeric_object_t is object:
+        if iu_64_floating_obj_t is object:
             return Infinity()
-        elif numeric_object_t is int64_t:
+        elif iu_64_floating_obj_t is int64_t:
             return util.INT64_MAX
-        elif numeric_object_t is uint64_t:
+        elif iu_64_floating_obj_t is uint64_t:
             return util.UINT64_MAX
         else:
             return np.inf
     else:
-        if numeric_object_t is object:
+        if iu_64_floating_obj_t is object:
             return NegInfinity()
-        elif numeric_object_t is int64_t:
+        elif iu_64_floating_obj_t is int64_t:
             return NPY_NAT
-        elif numeric_object_t is uint64_t:
+        elif iu_64_floating_obj_t is uint64_t:
             return 0
         else:
             return -np.inf
@@ -893,7 +878,7 @@ cdef numeric_object_t get_rank_nan_fill_val(
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def rank_1d(
-    ndarray[numeric_object_t, ndim=1] values,
+    ndarray[iu_64_floating_obj_t, ndim=1] values,
     const intp_t[:] labels=None,
     bint is_datetimelike=False,
     ties_method="average",
@@ -906,7 +891,7 @@ def rank_1d(
 
     Parameters
     ----------
-    values : array of numeric_object_t values to be ranked
+    values : array of iu_64_floating_obj_t values to be ranked
     labels : np.ndarray[np.intp] or None
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`. If not called
@@ -936,11 +921,11 @@ def rank_1d(
         int64_t[::1] grp_sizes
         intp_t[:] lexsort_indexer
         float64_t[::1] out
-        ndarray[numeric_object_t, ndim=1] masked_vals
-        numeric_object_t[:] masked_vals_memview
+        ndarray[iu_64_floating_obj_t, ndim=1] masked_vals
+        iu_64_floating_obj_t[:] masked_vals_memview
         uint8_t[:] mask
         bint keep_na, nans_rank_highest, check_labels, check_mask
-        numeric_object_t nan_fill_val
+        iu_64_floating_obj_t nan_fill_val
 
     tiebreak = tiebreakers[ties_method]
     if tiebreak == TIEBREAK_FIRST:
@@ -961,22 +946,22 @@ def rank_1d(
     check_labels = labels is not None
 
     # For cases where a mask is not possible, we can avoid mask checks
-    check_mask = not (numeric_object_t is uint64_t or
-                      (numeric_object_t is int64_t and not is_datetimelike))
+    check_mask = not (iu_64_floating_obj_t is uint64_t or
+                      (iu_64_floating_obj_t is int64_t and not is_datetimelike))
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
     # in values array
-    if numeric_object_t is object and values.dtype != np.object_:
+    if iu_64_floating_obj_t is object and values.dtype != np.object_:
         masked_vals = values.astype('O')
     else:
         masked_vals = values.copy()
 
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         mask = missing.isnaobj(masked_vals)
-    elif numeric_object_t is int64_t and is_datetimelike:
+    elif iu_64_floating_obj_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
-    elif numeric_object_t is float64_t:
+    elif iu_64_floating_obj_t is float64_t:
         mask = np.isnan(masked_vals).astype(np.uint8)
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
@@ -988,7 +973,7 @@ def rank_1d(
     # will flip the ordering to still end up with lowest rank.
     # Symmetric logic applies to `na_option == 'bottom'`
     nans_rank_highest = ascending ^ (na_option == 'top')
-    nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
+    nan_fill_val = get_rank_nan_fill_val[iu_64_floating_obj_t](nans_rank_highest)
     if nans_rank_highest:
         order = [masked_vals, mask]
     else:
@@ -1035,7 +1020,7 @@ cdef void rank_sorted_1d(
     int64_t[::1] grp_sizes,
     const intp_t[:] sort_indexer,
     # Can make const with cython3 (https://github.com/cython/cython/issues/3222)
-    numeric_object_t[:] masked_vals,
+    iu_64_floating_obj_t[:] masked_vals,
     const uint8_t[:] mask,
     bint check_mask,
     Py_ssize_t N,
@@ -1059,7 +1044,7 @@ cdef void rank_sorted_1d(
         if labels is None.
     sort_indexer : intp_t[:]
         Array of indices which sorts masked_vals
-    masked_vals : numeric_object_t[:]
+    masked_vals : iu_64_floating_obj_t[:]
         The values input to rank_1d, with missing values replaced by fill values
     mask : uint8_t[:]
         Array where entries are True if the value is missing, False otherwise.
@@ -1091,7 +1076,7 @@ cdef void rank_sorted_1d(
     # that sorted value for retrieval back from the original
     # values / masked_vals arrays
     # TODO: de-duplicate once cython supports conditional nogil
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         with gil:
             for i in range(N):
                 at_end = i == N - 1
@@ -1299,7 +1284,7 @@ cdef void rank_sorted_1d(
 
 
 def rank_2d(
-    ndarray[numeric_object_t, ndim=2] in_arr,
+    ndarray[iu_64_floating_obj_t, ndim=2] in_arr,
     int axis=0,
     bint is_datetimelike=False,
     ties_method="average",
@@ -1314,13 +1299,13 @@ def rank_2d(
         Py_ssize_t k, n, col
         float64_t[::1, :] out  # Column-major so columns are contiguous
         int64_t[::1] grp_sizes
-        ndarray[numeric_object_t, ndim=2] values
-        numeric_object_t[:, :] masked_vals
+        ndarray[iu_64_floating_obj_t, ndim=2] values
+        iu_64_floating_obj_t[:, :] masked_vals
         intp_t[:, :] sort_indexer
         uint8_t[:, :] mask
         TiebreakEnumType tiebreak
         bint check_mask, keep_na, nans_rank_highest
-        numeric_object_t nan_fill_val
+        iu_64_floating_obj_t nan_fill_val
 
     tiebreak = tiebreakers[ties_method]
     if tiebreak == TIEBREAK_FIRST:
@@ -1330,25 +1315,25 @@ def rank_2d(
     keep_na = na_option == 'keep'
 
     # For cases where a mask is not possible, we can avoid mask checks
-    check_mask = not (numeric_object_t is uint64_t or
-                      (numeric_object_t is int64_t and not is_datetimelike))
+    check_mask = not (iu_64_floating_obj_t is uint64_t or
+                      (iu_64_floating_obj_t is int64_t and not is_datetimelike))
 
     if axis == 1:
         values = np.asarray(in_arr).T.copy()
     else:
         values = np.asarray(in_arr).copy()
 
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         if values.dtype != np.object_:
             values = values.astype('O')
 
     nans_rank_highest = ascending ^ (na_option == 'top')
     if check_mask:
-        nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
+        nan_fill_val = get_rank_nan_fill_val[iu_64_floating_obj_t](nans_rank_highest)
 
-        if numeric_object_t is object:
+        if iu_64_floating_obj_t is object:
             mask = missing.isnaobj2d(values).view(np.uint8)
-        elif numeric_object_t is float64_t:
+        elif iu_64_floating_obj_t is float64_t:
             mask = np.isnan(values).view(np.uint8)
 
         # int64 and datetimelike
diff --git a/pandas/_libs/dtypes.pxd b/pandas/_libs/dtypes.pxd
index ef95b8aab6e70..ad579ada7417a 100644
--- a/pandas/_libs/dtypes.pxd
+++ b/pandas/_libs/dtypes.pxd
@@ -5,13 +5,40 @@ Common location for shared fused types
 from numpy cimport (
     float32_t,
     float64_t,
+    int8_t,
+    int16_t,
+    int32_t,
     int64_t,
+    uint8_t,
+    uint16_t,
+    uint32_t,
     uint64_t,
 )
 
+ctypedef fused numeric_t:
+    int8_t
+    int16_t
+    int32_t
+    int64_t
+
+    uint8_t
+    uint16_t
+    uint32_t
+    uint64_t
+
+    float32_t
+    float64_t
+
 ctypedef fused numeric_object_t:
+    numeric_t
+    object
+
+ctypedef fused iu_64_floating_t:
     float64_t
     float32_t
     int64_t
     uint64_t
+
+ctypedef fused iu_64_floating_obj_t:
+    iu_64_floating_t
     object
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index bbdc5a8287502..6988c2cf7f28e 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -33,7 +33,6 @@ cnp.import_array()
 from pandas._libs.algos cimport kth_smallest_c
 from pandas._libs.util cimport (
     get_nat,
-    numeric,
 )
 
 from pandas._libs.algos import (
@@ -43,7 +42,7 @@ from pandas._libs.algos import (
     take_2d_axis1_float64_float64,
 )
 
-from pandas._libs.dtypes cimport numeric_object_t
+from pandas._libs.dtypes cimport iu_64_floating_obj_t, iu_64_floating_t, numeric_t
 from pandas._libs.missing cimport checknull
 
 
@@ -201,8 +200,8 @@ def group_cumprod_float64(float64_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cumsum(numeric[:, ::1] out,
-                 ndarray[numeric, ndim=2] values,
+def group_cumsum(numeric_t[:, ::1] out,
+                 ndarray[numeric_t, ndim=2] values,
                  const intp_t[::1] labels,
                  int ngroups,
                  is_datetimelike,
@@ -231,8 +230,8 @@ def group_cumsum(numeric[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, size
-        numeric val, y, t
-        numeric[:, ::1] accum, compensation
+        numeric_t val, y, t
+        numeric_t[:, ::1] accum, compensation
         intp_t lab
 
     N, K = (<object>values).shape
@@ -250,7 +249,7 @@ def group_cumsum(numeric[:, ::1] out,
 
                 # For floats, use Kahan summation to reduce floating-point
                 # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
-                if numeric == float32_t or numeric == float64_t:
+                if numeric_t == float32_t or numeric_t == float64_t:
                     if val == val:
                         y = val - compensation[lab, j]
                         t = accum[lab, j] + y
@@ -806,7 +805,7 @@ def group_ohlc(floating[:, ::1] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_quantile(ndarray[float64_t, ndim=2] out,
-                   ndarray[numeric, ndim=1] values,
+                   ndarray[numeric_t, ndim=1] values,
                    ndarray[intp_t] labels,
                    ndarray[uint8_t] mask,
                    const intp_t[:] sort_indexer,
@@ -922,15 +921,15 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
 # group_nth, group_last, group_rank
 # ----------------------------------------------------------------------
 
-cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
-    if numeric_object_t is object:
+cdef inline bint _treat_as_na(iu_64_floating_obj_t val, bint is_datetimelike) nogil:
+    if iu_64_floating_obj_t is object:
         # Should never be used, but we need to avoid the `val != val` below
         #  or else cython will raise about gil acquisition.
         raise NotImplementedError
 
-    elif numeric_object_t is int64_t:
+    elif iu_64_floating_obj_t is int64_t:
         return is_datetimelike and val == NPY_NAT
-    elif numeric_object_t is uint64_t:
+    elif iu_64_floating_obj_t is uint64_t:
         # There is no NA value for uint64
         return False
     else:
@@ -938,12 +937,12 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
 
 
 # GH#31710 use memorviews once cython 0.30 is released so we can
-#  use `const numeric_object_t[:, :] values`
+#  use `const iu_64_floating_obj_t[:, :] values`
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_last(numeric_object_t[:, ::1] out,
+def group_last(iu_64_floating_obj_t[:, ::1] out,
                int64_t[::1] counts,
-               ndarray[numeric_object_t, ndim=2] values,
+               ndarray[iu_64_floating_obj_t, ndim=2] values,
                const intp_t[::1] labels,
                Py_ssize_t min_count=-1) -> None:
     """
@@ -951,8 +950,8 @@ def group_last(numeric_object_t[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        numeric_object_t val
-        ndarray[numeric_object_t, ndim=2] resx
+        iu_64_floating_obj_t val
+        ndarray[iu_64_floating_obj_t, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
         bint runtime_error = False
 
@@ -963,14 +962,14 @@ def group_last(numeric_object_t[:, ::1] out,
 
     min_count = max(min_count, 1)
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         resx = np.empty((<object>out).shape, dtype=object)
     else:
         resx = np.empty_like(out)
 
     N, K = (<object>values).shape
 
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         # TODO: De-duplicate once conditional-nogil is available
         for i in range(N):
             lab = labels[i]
@@ -1012,9 +1011,9 @@ def group_last(numeric_object_t[:, ::1] out,
             for i in range(ncounts):
                 for j in range(K):
                     if nobs[i, j] < min_count:
-                        if numeric_object_t is int64_t:
+                        if iu_64_floating_obj_t is int64_t:
                             out[i, j] = NPY_NAT
-                        elif numeric_object_t is uint64_t:
+                        elif iu_64_floating_obj_t is uint64_t:
                             runtime_error = True
                             break
                         else:
@@ -1030,12 +1029,12 @@ def group_last(numeric_object_t[:, ::1] out,
 
 
 # GH#31710 use memorviews once cython 0.30 is released so we can
-#  use `const numeric_object_t[:, :] values`
+#  use `const iu_64_floating_obj_t[:, :] values`
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_nth(numeric_object_t[:, ::1] out,
+def group_nth(iu_64_floating_obj_t[:, ::1] out,
               int64_t[::1] counts,
-              ndarray[numeric_object_t, ndim=2] values,
+              ndarray[iu_64_floating_obj_t, ndim=2] values,
               const intp_t[::1] labels,
               int64_t min_count=-1,
               int64_t rank=1,
@@ -1045,8 +1044,8 @@ def group_nth(numeric_object_t[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        numeric_object_t val
-        ndarray[numeric_object_t, ndim=2] resx
+        iu_64_floating_obj_t val
+        ndarray[iu_64_floating_obj_t, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
         bint runtime_error = False
 
@@ -1057,14 +1056,14 @@ def group_nth(numeric_object_t[:, ::1] out,
 
     min_count = max(min_count, 1)
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         resx = np.empty((<object>out).shape, dtype=object)
     else:
         resx = np.empty_like(out)
 
     N, K = (<object>values).shape
 
-    if numeric_object_t is object:
+    if iu_64_floating_obj_t is object:
         # TODO: De-duplicate once conditional-nogil is available
         for i in range(N):
             lab = labels[i]
@@ -1109,9 +1108,9 @@ def group_nth(numeric_object_t[:, ::1] out,
             for i in range(ncounts):
                 for j in range(K):
                     if nobs[i, j] < min_count:
-                        if numeric_object_t is int64_t:
+                        if iu_64_floating_obj_t is int64_t:
                             out[i, j] = NPY_NAT
-                        elif numeric_object_t is uint64_t:
+                        elif iu_64_floating_obj_t is uint64_t:
                             runtime_error = True
                             break
                         else:
@@ -1128,7 +1127,7 @@ def group_nth(numeric_object_t[:, ::1] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_rank(float64_t[:, ::1] out,
-               ndarray[numeric_object_t, ndim=2] values,
+               ndarray[iu_64_floating_obj_t, ndim=2] values,
                const intp_t[::1] labels,
                int ngroups,
                bint is_datetimelike, str ties_method="average",
@@ -1140,7 +1139,7 @@ def group_rank(float64_t[:, ::1] out,
     ----------
     out : np.ndarray[np.float64, ndim=2]
         Values to which this method will write its results.
-    values : np.ndarray of numeric_object_t values to be ranked
+    values : np.ndarray of iu_64_floating_obj_t values to be ranked
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
@@ -1195,18 +1194,12 @@ def group_rank(float64_t[:, ::1] out,
 # ----------------------------------------------------------------------
 
 # TODO: consider implementing for more dtypes
-ctypedef fused groupby_t:
-    float64_t
-    float32_t
-    int64_t
-    uint64_t
-
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef group_min_max(groupby_t[:, ::1] out,
+cdef group_min_max(iu_64_floating_t[:, ::1] out,
                    int64_t[::1] counts,
-                   ndarray[groupby_t, ndim=2] values,
+                   ndarray[iu_64_floating_t, ndim=2] values,
                    const intp_t[::1] labels,
                    Py_ssize_t min_count=-1,
                    bint is_datetimelike=False,
@@ -1218,7 +1211,7 @@ cdef group_min_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : np.ndarray[groupby_t, ndim=2]
+    out : np.ndarray[iu_64_floating_t, ndim=2]
         Array to store result in.
     counts : np.ndarray[int64]
         Input as a zeroed array, populated by group sizes during algorithm
@@ -1247,8 +1240,8 @@ cdef group_min_max(groupby_t[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ngroups = len(counts)
-        groupby_t val, nan_val
-        ndarray[groupby_t, ndim=2] group_min_or_max
+        iu_64_floating_t val, nan_val
+        ndarray[iu_64_floating_t, ndim=2] group_min_or_max
         bint runtime_error = False
         int64_t[:, ::1] nobs
         bint uses_mask = mask is not None
@@ -1263,10 +1256,10 @@ cdef group_min_max(groupby_t[:, ::1] out,
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
 
     group_min_or_max = np.empty_like(out)
-    if groupby_t is int64_t:
+    if iu_64_floating_t is int64_t:
         group_min_or_max[:] = -_int64_max if compute_max else _int64_max
         nan_val = NPY_NAT
-    elif groupby_t is uint64_t:
+    elif iu_64_floating_t is uint64_t:
         # NB: We do not define nan_val because there is no such thing
         # for uint64_t.  We carefully avoid having to reference it in this
         # case.
@@ -1304,7 +1297,7 @@ cdef group_min_max(groupby_t[:, ::1] out,
         for i in range(ngroups):
             for j in range(K):
                 if nobs[i, j] < min_count:
-                    if groupby_t is uint64_t:
+                    if iu_64_floating_t is uint64_t:
                         runtime_error = True
                         break
                     else:
@@ -1323,9 +1316,9 @@ cdef group_min_max(groupby_t[:, ::1] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_max(groupby_t[:, ::1] out,
+def group_max(iu_64_floating_t[:, ::1] out,
               int64_t[::1] counts,
-              ndarray[groupby_t, ndim=2] values,
+              ndarray[iu_64_floating_t, ndim=2] values,
               const intp_t[::1] labels,
               Py_ssize_t min_count=-1,
               bint is_datetimelike=False,
@@ -1347,9 +1340,9 @@ def group_max(groupby_t[:, ::1] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min(groupby_t[:, ::1] out,
+def group_min(iu_64_floating_t[:, ::1] out,
               int64_t[::1] counts,
-              ndarray[groupby_t, ndim=2] values,
+              ndarray[iu_64_floating_t, ndim=2] values,
               const intp_t[::1] labels,
               Py_ssize_t min_count=-1,
               bint is_datetimelike=False,
@@ -1371,8 +1364,8 @@ def group_min(groupby_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef group_cummin_max(groupby_t[:, ::1] out,
-                      ndarray[groupby_t, ndim=2] values,
+cdef group_cummin_max(iu_64_floating_t[:, ::1] out,
+                      ndarray[iu_64_floating_t, ndim=2] values,
                       uint8_t[:, ::1] mask,
                       const intp_t[::1] labels,
                       int ngroups,
@@ -1384,9 +1377,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : np.ndarray[groupby_t, ndim=2]
+    out : np.ndarray[iu_64_floating_t, ndim=2]
         Array to store cummin/max in.
-    values : np.ndarray[groupby_t, ndim=2]
+    values : np.ndarray[iu_64_floating_t, ndim=2]
         Values to take cummin/max of.
     mask : np.ndarray[bool] or None
         If not None, indices represent missing values,
@@ -1408,12 +1401,12 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
     This method modifies the `out` parameter, rather than returning an object.
     """
     cdef:
-        groupby_t[:, ::1] accum
+        iu_64_floating_t[:, ::1] accum
 
     accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
-    if groupby_t is int64_t:
+    if iu_64_floating_t is int64_t:
         accum[:] = -_int64_max if compute_max else _int64_max
-    elif groupby_t is uint64_t:
+    elif iu_64_floating_t is uint64_t:
         accum[:] = 0 if compute_max else np.iinfo(np.uint64).max
     else:
         accum[:] = -np.inf if compute_max else np.inf
@@ -1426,10 +1419,10 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef cummin_max(groupby_t[:, ::1] out,
-                ndarray[groupby_t, ndim=2] values,
+cdef cummin_max(iu_64_floating_t[:, ::1] out,
+                ndarray[iu_64_floating_t, ndim=2] values,
                 const intp_t[::1] labels,
-                groupby_t[:, ::1] accum,
+                iu_64_floating_t[:, ::1] accum,
                 bint skipna,
                 bint is_datetimelike,
                 bint compute_max):
@@ -1439,12 +1432,12 @@ cdef cummin_max(groupby_t[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K
-        groupby_t val, mval, na_val
+        iu_64_floating_t val, mval, na_val
         uint8_t[:, ::1] seen_na
         intp_t lab
         bint na_possible
 
-    if groupby_t is float64_t or groupby_t is float32_t:
+    if iu_64_floating_t is float64_t or iu_64_floating_t is float32_t:
         na_val = NaN
         na_possible = True
     elif is_datetimelike:
@@ -1485,11 +1478,11 @@ cdef cummin_max(groupby_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef masked_cummin_max(groupby_t[:, ::1] out,
-                       ndarray[groupby_t, ndim=2] values,
+cdef masked_cummin_max(iu_64_floating_t[:, ::1] out,
+                       ndarray[iu_64_floating_t, ndim=2] values,
                        uint8_t[:, ::1] mask,
                        const intp_t[::1] labels,
-                       groupby_t[:, ::1] accum,
+                       iu_64_floating_t[:, ::1] accum,
                        bint skipna,
                        bint compute_max):
     """
@@ -1498,7 +1491,7 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K
-        groupby_t val, mval
+        iu_64_floating_t val, mval
         uint8_t[:, ::1] seen_na
         intp_t lab
 
@@ -1529,8 +1522,8 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummin(groupby_t[:, ::1] out,
-                 ndarray[groupby_t, ndim=2] values,
+def group_cummin(iu_64_floating_t[:, ::1] out,
+                 ndarray[iu_64_floating_t, ndim=2] values,
                  const intp_t[::1] labels,
                  int ngroups,
                  bint is_datetimelike,
@@ -1551,8 +1544,8 @@ def group_cummin(groupby_t[:, ::1] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummax(groupby_t[:, ::1] out,
-                 ndarray[groupby_t, ndim=2] values,
+def group_cummax(iu_64_floating_t[:, ::1] out,
+                 ndarray[iu_64_floating_t, ndim=2] values,
                  const intp_t[::1] labels,
                  int ngroups,
                  bint is_datetimelike,
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index b6acf8914c0a6..286a1a189db4c 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -4,23 +4,16 @@ import numpy as np
 
 cimport numpy as cnp
 from numpy cimport (
-    float32_t,
-    float64_t,
-    int8_t,
-    int16_t,
-    int32_t,
     int64_t,
     intp_t,
     ndarray,
-    uint8_t,
-    uint16_t,
-    uint32_t,
     uint64_t,
 )
 
 cnp.import_array()
 
 from pandas._libs.algos import groupsort_indexer
+from pandas._libs.dtypes cimport numeric_object_t, numeric_t
 
 
 @cython.wraparound(False)
@@ -257,31 +250,17 @@ def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:
 # left_join_indexer, inner_join_indexer, outer_join_indexer
 # ----------------------------------------------------------------------
 
-ctypedef fused join_t:
-    float64_t
-    float32_t
-    object
-    int8_t
-    int16_t
-    int32_t
-    int64_t
-    uint8_t
-    uint16_t
-    uint32_t
-    uint64_t
-
-
 # Joins on ordered, unique indices
 
 # right might contain non-unique values
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
+def left_join_indexer_unique(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t] indexer
-        join_t lval, rval
+        numeric_object_t lval, rval
 
     i = 0
     j = 0
@@ -322,15 +301,15 @@ def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
+def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     """
     Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
     """
     cdef:
         Py_ssize_t i, j, k, nright, nleft, count
-        join_t lval, rval
+        numeric_object_t lval, rval
         ndarray[intp_t] lindexer, rindexer
-        ndarray[join_t] result
+        ndarray[numeric_object_t] result
 
     nleft = len(left)
     nright = len(right)
@@ -425,15 +404,15 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
+def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     """
     Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
     """
     cdef:
         Py_ssize_t i, j, k, nright, nleft, count
-        join_t lval, rval
+        numeric_object_t lval, rval
         ndarray[intp_t] lindexer, rindexer
-        ndarray[join_t] result
+        ndarray[numeric_object_t] result
 
     nleft = len(left)
     nright = len(right)
@@ -518,12 +497,12 @@ def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
+def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     cdef:
         Py_ssize_t i, j, nright, nleft, count
-        join_t lval, rval
+        numeric_object_t lval, rval
         ndarray[intp_t] lindexer, rindexer
-        ndarray[join_t] result
+        ndarray[numeric_object_t] result
 
     nleft = len(left)
     nright = len(right)
@@ -656,26 +635,14 @@ from pandas._libs.hashtable cimport (
     UInt64HashTable,
 )
 
-ctypedef fused asof_t:
-    uint8_t
-    uint16_t
-    uint32_t
-    uint64_t
-    int8_t
-    int16_t
-    int32_t
-    int64_t
-    float
-    float64_t
-
 ctypedef fused by_t:
     object
     int64_t
     uint64_t
 
 
-def asof_join_backward_on_X_by_Y(asof_t[:] left_values,
-                                 asof_t[:] right_values,
+def asof_join_backward_on_X_by_Y(numeric_t[:] left_values,
+                                 numeric_t[:] right_values,
                                  by_t[:] left_by_values,
                                  by_t[:] right_by_values,
                                  bint allow_exact_matches=True,
@@ -685,8 +652,8 @@ def asof_join_backward_on_X_by_Y(asof_t[:] left_values,
         Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
         ndarray[intp_t] left_indexer, right_indexer
         bint has_tolerance = False
-        asof_t tolerance_ = 0
-        asof_t diff = 0
+        numeric_t tolerance_ = 0
+        numeric_t diff = 0
         HashTable hash_table
         by_t by_value
 
@@ -743,8 +710,8 @@ def asof_join_backward_on_X_by_Y(asof_t[:] left_values,
     return left_indexer, right_indexer
 
 
-def asof_join_forward_on_X_by_Y(asof_t[:] left_values,
-                                asof_t[:] right_values,
+def asof_join_forward_on_X_by_Y(numeric_t[:] left_values,
+                                numeric_t[:] right_values,
                                 by_t[:] left_by_values,
                                 by_t[:] right_by_values,
                                 bint allow_exact_matches=1,
@@ -754,8 +721,8 @@ def asof_join_forward_on_X_by_Y(asof_t[:] left_values,
         Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
         ndarray[intp_t] left_indexer, right_indexer
         bint has_tolerance = False
-        asof_t tolerance_ = 0
-        asof_t diff = 0
+        numeric_t tolerance_ = 0
+        numeric_t diff = 0
         HashTable hash_table
         by_t by_value
 
@@ -812,8 +779,8 @@ def asof_join_forward_on_X_by_Y(asof_t[:] left_values,
     return left_indexer, right_indexer
 
 
-def asof_join_nearest_on_X_by_Y(asof_t[:] left_values,
-                                asof_t[:] right_values,
+def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values,
+                                numeric_t[:] right_values,
                                 by_t[:] left_by_values,
                                 by_t[:] right_by_values,
                                 bint allow_exact_matches=True,
@@ -822,7 +789,7 @@ def asof_join_nearest_on_X_by_Y(asof_t[:] left_values,
     cdef:
         Py_ssize_t left_size, right_size, i
         ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri
-        asof_t bdiff, fdiff
+        numeric_t bdiff, fdiff
 
     left_size = len(left_values)
     right_size = len(right_values)
@@ -865,8 +832,8 @@ def asof_join_nearest_on_X_by_Y(asof_t[:] left_values,
 # asof_join
 # ----------------------------------------------------------------------
 
-def asof_join_backward(asof_t[:] left_values,
-                       asof_t[:] right_values,
+def asof_join_backward(numeric_t[:] left_values,
+                       numeric_t[:] right_values,
                        bint allow_exact_matches=True,
                        tolerance=None):
 
@@ -874,8 +841,8 @@ def asof_join_backward(asof_t[:] left_values,
         Py_ssize_t left_pos, right_pos, left_size, right_size
         ndarray[intp_t] left_indexer, right_indexer
         bint has_tolerance = False
-        asof_t tolerance_ = 0
-        asof_t diff = 0
+        numeric_t tolerance_ = 0
+        numeric_t diff = 0
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -918,8 +885,8 @@ def asof_join_backward(asof_t[:] left_values,
     return left_indexer, right_indexer
 
 
-def asof_join_forward(asof_t[:] left_values,
-                      asof_t[:] right_values,
+def asof_join_forward(numeric_t[:] left_values,
+                      numeric_t[:] right_values,
                       bint allow_exact_matches=True,
                       tolerance=None):
 
@@ -927,8 +894,8 @@ def asof_join_forward(asof_t[:] left_values,
         Py_ssize_t left_pos, right_pos, left_size, right_size
         ndarray[intp_t] left_indexer, right_indexer
         bint has_tolerance = False
-        asof_t tolerance_ = 0
-        asof_t diff = 0
+        numeric_t tolerance_ = 0
+        numeric_t diff = 0
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -972,15 +939,15 @@ def asof_join_forward(asof_t[:] left_values,
     return left_indexer, right_indexer
 
 
-def asof_join_nearest(asof_t[:] left_values,
-                      asof_t[:] right_values,
+def asof_join_nearest(numeric_t[:] left_values,
+                      numeric_t[:] right_values,
                       bint allow_exact_matches=True,
                       tolerance=None):
 
     cdef:
         Py_ssize_t left_size, right_size, i
         ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri
-        asof_t bdiff, fdiff
+        numeric_t bdiff, fdiff
 
     left_size = len(left_values)
     right_size = len(right_values)
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 959d83a55d4f3..45e9da52c0663 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -2,17 +2,9 @@ import cython
 from cython import Py_ssize_t
 
 from numpy cimport (
-    float32_t,
-    float64_t,
-    int8_t,
-    int16_t,
-    int32_t,
     int64_t,
     ndarray,
     uint8_t,
-    uint16_t,
-    uint32_t,
-    uint64_t,
 )
 
 import numpy as np
@@ -22,26 +14,14 @@ cimport numpy as cnp
 cnp.import_array()
 
 from pandas._libs.lib cimport c_is_list_like
-
-ctypedef fused reshape_t:
-    uint8_t
-    uint16_t
-    uint32_t
-    uint64_t
-    int8_t
-    int16_t
-    int32_t
-    int64_t
-    float32_t
-    float64_t
-    object
+from pandas._libs.dtypes cimport numeric_object_t
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
+def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask,
             Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
-            reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
+            numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
     """
     Transform long values to wide new_values.
 
@@ -60,7 +40,7 @@ def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
     cdef:
         Py_ssize_t i, j, w, nulls, s, offset
 
-    if reshape_t is not object:
+    if numeric_object_t is not object:
         # evaluated at compile-time
         with nogil:
             for i in range(stride):
diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd
index be22fc368c28f..df88c896ac593 100644
--- a/pandas/_libs/util.pxd
+++ b/pandas/_libs/util.pxd
@@ -16,18 +16,3 @@ cdef extern from "src/headers/stdint.h":
     enum: INT32_MIN
     enum: INT64_MAX
     enum: INT64_MIN
-
-
-ctypedef fused numeric:
-    cnp.int8_t
-    cnp.int16_t
-    cnp.int32_t
-    cnp.int64_t
-
-    cnp.uint8_t
-    cnp.uint16_t
-    cnp.uint32_t
-    cnp.uint64_t
-
-    cnp.float32_t
-    cnp.float64_t
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index ea52bd24a3689..4fdc9ad393470 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -28,7 +28,7 @@ cdef extern from "src/headers/cmath" namespace "std":
 
 from pandas._libs.algos import is_monotonic
 
-from pandas._libs.util cimport numeric
+from pandas._libs.dtypes cimport numeric_t
 
 
 cdef extern from "../src/skiplist.h":
@@ -851,18 +851,18 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
 # https://github.com/pydata/bottleneck
 
 
-cdef inline numeric init_mm(numeric ai, Py_ssize_t *nobs, bint is_max) nogil:
+cdef inline numeric_t init_mm(numeric_t ai, Py_ssize_t *nobs, bint is_max) nogil:
 
-    if numeric in cython.floating:
+    if numeric_t in cython.floating:
         if ai == ai:
             nobs[0] = nobs[0] + 1
         elif is_max:
-            if numeric == cython.float:
+            if numeric_t == cython.float:
                 ai = MINfloat32
             else:
                 ai = MINfloat64
         else:
-            if numeric == cython.float:
+            if numeric_t == cython.float:
                 ai = MAXfloat32
             else:
                 ai = MAXfloat64
@@ -873,18 +873,18 @@ cdef inline numeric init_mm(numeric ai, Py_ssize_t *nobs, bint is_max) nogil:
     return ai
 
 
-cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil:
+cdef inline void remove_mm(numeric_t aold, Py_ssize_t *nobs) nogil:
     """ remove a value from the mm calc """
-    if numeric in cython.floating and aold == aold:
+    if numeric_t in cython.floating and aold == aold:
         nobs[0] = nobs[0] - 1
 
 
-cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs,
-                            numeric value) nogil:
+cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs,
+                            numeric_t value) nogil:
     cdef:
-        numeric result
+        numeric_t result
 
-    if numeric in cython.floating:
+    if numeric_t in cython.floating:
         if nobs >= minp:
             result = value
         else:
@@ -940,13 +940,13 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
     return _roll_min_max(values, start, end, minp, is_max=0)
 
 
-cdef _roll_min_max(ndarray[numeric] values,
+cdef _roll_min_max(ndarray[numeric_t ] values,
                    ndarray[int64_t] starti,
                    ndarray[int64_t] endi,
                    int64_t minp,
                    bint is_max):
     cdef:
-        numeric ai
+        numeric_t ai
         int64_t curr_win_size, start
         Py_ssize_t i, k, nobs = 0, N = len(values)
         deque Q[int64_t]  # min/max always the front

From 0eef472594b9432f558b7db5394a1e1d1bbfe1a3 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 30 Sep 2021 16:25:49 -0400
Subject: [PATCH 2/4] Fix compilation issue

---
 pandas/_libs/window/aggregations.pyx | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 4fdc9ad393470..29fe20090875b 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -20,14 +20,13 @@ from numpy cimport (
 cnp.import_array()
 
 
-cdef extern from "src/headers/cmath" namespace "std":
+cdef extern from "../src/headers/cmath" namespace "std":
     bint isnan(float64_t) nogil
     bint notnan(float64_t) nogil
     int signbit(float64_t) nogil
     float64_t sqrt(float64_t x) nogil
 
 from pandas._libs.algos import is_monotonic
-
 from pandas._libs.dtypes cimport numeric_t
 
 
@@ -880,7 +879,7 @@ cdef inline void remove_mm(numeric_t aold, Py_ssize_t *nobs) nogil:
 
 
 cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs,
-                            numeric_t value) nogil:
+                              numeric_t value) nogil:
     cdef:
         numeric_t result
 
@@ -940,7 +939,7 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
     return _roll_min_max(values, start, end, minp, is_max=0)
 
 
-cdef _roll_min_max(ndarray[numeric_t ] values,
+cdef _roll_min_max(ndarray[numeric_t] values,
                    ndarray[int64_t] starti,
                    ndarray[int64_t] endi,
                    int64_t minp,

From 4302a3ccaa306d66b4be786328234d9a00d3ae78 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 30 Sep 2021 18:18:21 -0400
Subject: [PATCH 3/4] precommit fixup

---
 pandas/_libs/algos.pyx   | 22 ++++++++++++++++------
 pandas/_libs/groupby.pyx | 10 ++++++----
 pandas/_libs/join.pyx    | 11 +++++++++--
 pandas/_libs/reshape.pyx |  2 +-
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 7bb95ecf1ce35..82f9280870d59 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -45,7 +45,11 @@ from numpy cimport (
 cnp.import_array()
 
 cimport pandas._libs.util as util
-from pandas._libs.dtypes cimport numeric_object_t, numeric_t, iu_64_floating_obj_t
+from pandas._libs.dtypes cimport (
+    iu_64_floating_obj_t,
+    numeric_object_t,
+    numeric_t,
+)
 from pandas._libs.khash cimport (
     kh_destroy_int64,
     kh_get_int64,
@@ -55,9 +59,7 @@ from pandas._libs.khash cimport (
     kh_resize_int64,
     khiter_t,
 )
-from pandas._libs.util cimport (
-    get_nat,
-)
+from pandas._libs.util cimport get_nat
 
 import pandas._libs.missing as missing
 
@@ -541,7 +543,11 @@ def validate_limit(nobs: int | None, limit=None) -> int:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad(ndarray[numeric_object_t] old, ndarray[numeric_object_t] new, limit=None) -> ndarray:
+def pad(
+    ndarray[numeric_object_t] old,
+    ndarray[numeric_object_t] new,
+    limit=None
+) -> ndarray:
     # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
@@ -687,7 +693,11 @@ D
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill(ndarray[numeric_object_t] old, ndarray[numeric_object_t] new, limit=None) -> ndarray:
+def backfill(
+    ndarray[numeric_object_t] old,
+    ndarray[numeric_object_t] new,
+    limit=None
+) -> ndarray:
     # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index ab47fbca3f0dc..1e05ef443d516 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -31,9 +31,7 @@ from numpy.math cimport NAN
 cnp.import_array()
 
 from pandas._libs.algos cimport kth_smallest_c
-from pandas._libs.util cimport (
-    get_nat,
-)
+from pandas._libs.util cimport get_nat
 
 from pandas._libs.algos import (
     ensure_platform_int,
@@ -42,7 +40,11 @@ from pandas._libs.algos import (
     take_2d_axis1_float64_float64,
 )
 
-from pandas._libs.dtypes cimport iu_64_floating_obj_t, iu_64_floating_t, numeric_t
+from pandas._libs.dtypes cimport (
+    iu_64_floating_obj_t,
+    iu_64_floating_t,
+    numeric_t,
+)
 from pandas._libs.missing cimport checknull
 
 
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 286a1a189db4c..c9a4b49f90037 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -13,7 +13,11 @@ from numpy cimport (
 cnp.import_array()
 
 from pandas._libs.algos import groupsort_indexer
-from pandas._libs.dtypes cimport numeric_object_t, numeric_t
+
+from pandas._libs.dtypes cimport (
+    numeric_object_t,
+    numeric_t,
+)
 
 
 @cython.wraparound(False)
@@ -256,7 +260,10 @@ def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer_unique(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
+def left_join_indexer_unique(
+    ndarray[numeric_object_t] left,
+    ndarray[numeric_object_t] right
+):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t] indexer
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 45e9da52c0663..9d3b80b321537 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -13,8 +13,8 @@ cimport numpy as cnp
 
 cnp.import_array()
 
-from pandas._libs.lib cimport c_is_list_like
 from pandas._libs.dtypes cimport numeric_object_t
+from pandas._libs.lib cimport c_is_list_like
 
 
 @cython.wraparound(False)

From 5dd526a136b4575acea9aff5bc7fcf75c5efe10a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Thu, 30 Sep 2021 22:08:30 -0400
Subject: [PATCH 4/4] Add comments explaining fused types

---
 pandas/_libs/dtypes.pxd | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/_libs/dtypes.pxd b/pandas/_libs/dtypes.pxd
index ad579ada7417a..f87a1525b15fd 100644
--- a/pandas/_libs/dtypes.pxd
+++ b/pandas/_libs/dtypes.pxd
@@ -15,6 +15,7 @@ from numpy cimport (
     uint64_t,
 )
 
+# All numeric types except complex
 ctypedef fused numeric_t:
     int8_t
     int16_t
@@ -29,16 +30,19 @@ ctypedef fused numeric_t:
     float32_t
     float64_t
 
+# All numeric types + object, doesn't include complex
 ctypedef fused numeric_object_t:
     numeric_t
     object
 
+# i64 + u64 + all float types
 ctypedef fused iu_64_floating_t:
     float64_t
     float32_t
     int64_t
     uint64_t
 
+# i64 + u64 + all float types + object
 ctypedef fused iu_64_floating_obj_t:
     iu_64_floating_t
     object