pandas-dev · WillAyd · Sep 7, 2019 · Aug 26, 2019 · Aug 26, 2019 · Aug 26, 2019
diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
@@ -113,12 +113,23 @@ def setup(self, method, use_bottleneck):
             nanops._USE_BOTTLENECK = use_bottleneck
         self.df = pd.DataFrame(np.random.randn(1000, 30))
         self.df2 = pd.DataFrame(np.random.randn(1000, 30))
+        self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
+        self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
         self.s = pd.Series(np.random.randn(1000))
         self.s2 = pd.Series(np.random.randn(1000))
 
     def time_corr(self, method, use_bottleneck):
         self.df.corr(method=method)
 
+    def time_corr_wide(self, method, use_bottleneck):
+        self.df_wide.corr(method=method)
+
+    def time_corr_wide_nans(self, method, use_bottleneck):
+        self.df_wide_nans.corr(method=method)
+
+    def peakmem_corr_wide(self, method, use_bottleneck):
+        self.df_wide.corr(method=method)
+
     def time_corr_series(self, method, use_bottleneck):
         self.s.corr(self.s2, method=method)
 

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -75,9 +75,9 @@ Performance improvements
 - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
 - Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`)
 - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
+- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`)
 - Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
 
-
 .. _whatsnew_1000.bug_fixes:
 
 Bug fixes

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -296,6 +296,7 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
     cdef:
         Py_ssize_t i, j, xi, yi, N, K
         ndarray[float64_t, ndim=2] result
+        ndarray[float64_t, ndim=2] ranked_mat
         ndarray[float64_t, ndim=1] maskedx
         ndarray[float64_t, ndim=1] maskedy
         ndarray[uint8_t, ndim=2] mask
@@ -307,10 +308,18 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
     result = np.empty((K, K), dtype=np.float64)
     mask = np.isfinite(mat).view(np.uint8)
 
+    ranked_mat = np.empty((N, K), dtype=np.float64)
+
+    for i in range(K):
+        ranked_mat[:, i] = rank_1d_float64(mat[:, i])
+
     for xi in range(K):
         for yi in range(xi + 1):
             nobs = 0
+            # Keep track of whether we need to recompute ranks
+            all_ranks = True
             for i in range(N):
+                all_ranks &= not (mask[i, xi] ^ mask[i, yi])
                 if mask[i, xi] and mask[i, yi]:
                     nobs += 1
 
@@ -320,13 +329,16 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
                 maskedx = np.empty(nobs, dtype=np.float64)
                 maskedy = np.empty(nobs, dtype=np.float64)
                 j = 0
+
                 for i in range(N):
                     if mask[i, xi] and mask[i, yi]:
-                        maskedx[j] = mat[i, xi]
-                        maskedy[j] = mat[i, yi]
+                        maskedx[j] = ranked_mat[i, xi]
+                        maskedy[j] = ranked_mat[i, yi]
                         j += 1
-                maskedx = rank_1d_float64(maskedx)
-                maskedy = rank_1d_float64(maskedy)
+
+                if not all_ranks:
+                    maskedx = rank_1d_float64(maskedx)
+                    maskedy = rank_1d_float64(maskedy)
 
                 mean = (nobs + 1) / 2.