Skip to content
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,23 @@ def setup(self, method, use_bottleneck):
nanops._USE_BOTTLENECK = use_bottleneck
self.df = pd.DataFrame(np.random.randn(1000, 30))
self.df2 = pd.DataFrame(np.random.randn(1000, 30))
self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
self.s = pd.Series(np.random.randn(1000))
self.s2 = pd.Series(np.random.randn(1000))

def time_corr(self, method, use_bottleneck):
self.df.corr(method=method)

def time_corr_wide(self, method, use_bottleneck):
self.df_wide.corr(method=method)

def time_corr_wide_nans(self, method, use_bottleneck):
self.df_wide_nans.corr(method=method)

def peakmem_corr_wide(self, method, use_bottleneck):
self.df_wide.corr(method=method)

def time_corr_series(self, method, use_bottleneck):
self.s.corr(self.s2, method=method)

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ Performance improvements
- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
- Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`)
- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`)
- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)


.. _whatsnew_1000.bug_fixes:

Bug fixes
Expand Down
20 changes: 16 additions & 4 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
cdef:
Py_ssize_t i, j, xi, yi, N, K
ndarray[float64_t, ndim=2] result
ndarray[float64_t, ndim=2] ranked_mat
ndarray[float64_t, ndim=1] maskedx
ndarray[float64_t, ndim=1] maskedy
ndarray[uint8_t, ndim=2] mask
Expand All @@ -307,10 +308,18 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
result = np.empty((K, K), dtype=np.float64)
mask = np.isfinite(mat).view(np.uint8)

ranked_mat = np.empty((N, K), dtype=np.float64)

for i in range(K):
ranked_mat[:, i] = rank_1d_float64(mat[:, i])

for xi in range(K):
for yi in range(xi + 1):
nobs = 0
# Keep track of whether we need to recompute ranks
all_ranks = True
for i in range(N):
all_ranks &= not (mask[i, xi] ^ mask[i, yi])
if mask[i, xi] and mask[i, yi]:
nobs += 1

Expand All @@ -320,13 +329,16 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
maskedx = np.empty(nobs, dtype=np.float64)
maskedy = np.empty(nobs, dtype=np.float64)
j = 0

for i in range(N):
if mask[i, xi] and mask[i, yi]:
maskedx[j] = mat[i, xi]
maskedy[j] = mat[i, yi]
maskedx[j] = ranked_mat[i, xi]
maskedy[j] = ranked_mat[i, yi]
j += 1
maskedx = rank_1d_float64(maskedx)
maskedy = rank_1d_float64(maskedy)

if not all_ranks:
maskedx = rank_1d_float64(maskedx)
maskedy = rank_1d_float64(maskedy)

mean = (nobs + 1) / 2.

Expand Down