pandas-dev · GYHHAHA · Oct 10, 2022 · Oct 10, 2022 · Oct 10, 2022 · Oct 10, 2022
diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
@@ -86,6 +86,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`)
 - Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`)
 - Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`)
+- Fixed regression in :meth:`DataFrame.corrwith` when computing correlation on tied data with ``method="spearman"`` (:issue:`48826`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10593,23 +10593,30 @@ def corrwith(
                 corrs = {}
                 if numeric_only:
                     cols = self.select_dtypes(include=np.number).columns
-                    ndf = self[cols].values.transpose()
                 else:
                     cols = self.columns
-                    ndf = self.values.transpose()
                 k = other.values
+                k_mask = ~other.isna()
+                if isinstance(k, BaseMaskedArray):
+                    k = k._data
                 if method == "pearson":
-                    for i, r in enumerate(ndf):
-                        nonnull_mask = ~np.isnan(r) & ~np.isnan(k)
-                        corrs[cols[i]] = np.corrcoef(r[nonnull_mask], k[nonnull_mask])[
-                            0, 1
-                        ]
+                    for col in cols:
+                        val = self[col].values
+                        nonnull_mask = ~self[col].isna() & k_mask
+                        if isinstance(val, BaseMaskedArray):
+                            val = val._data
+                        corrs[col] = np.corrcoef(
+                            val[nonnull_mask], k[nonnull_mask]
+                        )[0, 1]
                 else:
-                    for i, r in enumerate(ndf):
-                        nonnull_mask = ~np.isnan(r) & ~np.isnan(k)
-                        corrs[cols[i]] = np.corrcoef(
-                            r[nonnull_mask].argsort().argsort(),
-                            k[nonnull_mask].argsort().argsort(),
+                    for col in cols:
+                        val = self[col].values
+                        nonnull_mask = ~self[col].isna() & k_mask
+                        if isinstance(val, BaseMaskedArray):
+                            val = val._data
+                        corrs[col] = np.corrcoef(
+                            libalgos.rank_1d(val[nonnull_mask]),
+                            libalgos.rank_1d(k[nonnull_mask]),
                         )[0, 1]
                 return Series(corrs)
             else:

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -355,7 +355,10 @@ def test_corrwith_mixed_dtypes(self, numeric_only):
             expected = Series(data=corrs, index=["a", "b"])
             tm.assert_series_equal(result, expected)
         else:
-            with pytest.raises(TypeError, match="not supported for the input types"):
+            with pytest.raises(
+                TypeError,
+                match=r"unsupported operand type\(s\) for /: 'str' and 'int'",
+            ):
                 df.corrwith(s, numeric_only=numeric_only)
 
     def test_corrwith_index_intersection(self):
@@ -406,3 +409,86 @@ def test_corrwith_kendall(self):
         result = df.corrwith(df**2, method="kendall")
         expected = Series(np.ones(len(result)))
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "numeric_only, ser, expected",
+        [
+            (
+                True,
+                Series([0, 1, 1, 0]),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                True,
+                Series([0.0, 1.0, 1.0, 0.0]),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                True,
+                Series([False, True, True, False]),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                False,
+                Series([0, 1, 1, 0]),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+            (
+                False,
+                Series([0.0, 1.0, 1.0, 0.0]),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+            (
+                False,
+                Series([False, True, True, False]),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+            (
+                True,
+                Series([0, pd.NA, 1, 0], dtype="Int64"),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                True,
+                Series([0.0, pd.NA, 1.0, 0.0], dtype="Float64"),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                True,
+                Series([False, pd.NA, True, False], dtype="boolean"),
+                Series([0.0] * 3 + [1.0] * 4, index=list("ABCDEFH")),
+            ),
+            (
+                False,
+                Series([0, pd.NA, 1, 0], dtype="Int64"),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+            (
+                False,
+                Series([0, pd.NA, 1, 0], dtype="Int64"),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+            (
+                False,
+                Series([False, pd.NA, True, False], dtype="boolean"),
+                Series([0.0] * 3 + [1.0] * 5, index=list("ABCDEFGH")),
+            ),
+        ],
+    )
+    def test_corrwith_spearman_with_tied_data(self, ser, numeric_only, expected):
+        # GH#21925
+        df = DataFrame(
+            {
+                "A": [2, 5, 8, 9],
+                "B": [2, np.nan, 8, 9],
+                "C": [2, np.nan, 8, 9],
+                "D": [0, 1, 1, 0],
+                "E": [0, np.nan, 1, 0],
+                "F": [0, np.nan, 1, 0],
+                "G": [False, True, True, False],
+                "H": [False, pd.NA, True, False],
+            },
+        ).astype({"C": "Int64", "F": "Float64", "H": "boolean"})
+        s = Series([0, 1, 1, 0])
+        result = df.corrwith(s, method="spearman", numeric_only=numeric_only)
+        tm.assert_series_equal(result, expected)