Skip to content

Commit f6b11fe

Browse files
BUG: Fix kendall df.corr when in DF num and bool
1 parent 49a15b1 commit f6b11fe

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

doc/source/whatsnew/v0.17.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,6 @@ Bug Fixes
123123

124124
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
125125

126-
126+
- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`)
127127

128128
- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`)

pandas/core/frame.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4411,16 +4411,21 @@ def corr(self, method='pearson', min_periods=1):
44114411
else:
44124412
if min_periods is None:
44134413
min_periods = 1
4414-
mat = mat.T
4414+
mat = com._ensure_float64(mat).T
44154415
corrf = nanops.get_corr_func(method)
44164416
K = len(cols)
44174417
correl = np.empty((K, K), dtype=float)
44184418
mask = np.isfinite(mat)
44194419
for i, ac in enumerate(mat):
44204420
for j, bc in enumerate(mat):
4421+
if i > j:
4422+
continue
4423+
44214424
valid = mask[i] & mask[j]
44224425
if valid.sum() < min_periods:
44234426
c = NA
4427+
elif i == j:
4428+
c = 1.
44244429
elif not valid.all():
44254430
c = corrf(ac[valid], bc[valid])
44264431
else:

pandas/tests/test_frame.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8002,12 +8002,14 @@ def test_corr_nooverlap(self):
80028002
# nothing in common
80038003
for meth in ['pearson', 'kendall', 'spearman']:
80048004
df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan],
8005-
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1]})
8005+
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1],
8006+
'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]})
80068007
rs = df.corr(meth)
80078008
self.assertTrue(isnull(rs.ix['A', 'B']))
80088009
self.assertTrue(isnull(rs.ix['B', 'A']))
80098010
self.assertEqual(rs.ix['A', 'A'], 1)
80108011
self.assertEqual(rs.ix['B', 'B'], 1)
8012+
self.assertTrue(isnull(rs.ix['C', 'C']))
80118013

80128014
def test_corr_constant(self):
80138015
tm._skip_if_no_scipy()
@@ -8028,6 +8030,18 @@ def test_corr_int(self):
80288030
df3.cov()
80298031
df3.corr()
80308032

8033+
def test_corr_int_and_boolean(self):
8034+
tm._skip_if_no_scipy()
8035+
8036+
# when dtypes of pandas series are different
8037+
# then ndarray will have dtype=object,
8038+
# so it need to be properly handled
8039+
df = DataFrame({"a": [True, False], "b": [1, 0]})
8040+
8041+
expected = DataFrame(np.ones((2, 2)), index=['a', 'b'], columns=['a', 'b'])
8042+
for meth in ['pearson', 'kendall', 'spearman']:
8043+
assert_frame_equal(df.corr(meth), expected)
8044+
80318045
def test_cov(self):
80328046
# min_periods no NAs (corner case)
80338047
expected = self.frame.cov()

0 commit comments

Comments
 (0)