Skip to content

Commit 5c211bf

Browse files
committed
Merge pull request #6737 from jreback/infer
BUG: Bug in downcasting inference with empty arrays (GH6733)
2 parents bbab950 + 92e8ac8 commit 5c211bf

File tree

4 files changed

+32
-13
lines changed

4 files changed

+32
-13
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ Bug Fixes
292292
- Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`)
293293
- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`)
294294
- Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`)
295+
- Bug in downcasting inference with empty arrays (:issue:`6733`)
295296

296297
pandas 0.13.1
297298
-------------

pandas/core/common.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ def _possibly_downcast_to_dtype(result, dtype):
10841084
or could be an astype of float64->float32
10851085
"""
10861086

1087-
if np.isscalar(result) or not len(result):
1087+
if np.isscalar(result):
10881088
return result
10891089

10901090
trans = lambda x: x
@@ -1114,15 +1114,19 @@ def _possibly_downcast_to_dtype(result, dtype):
11141114

11151115
try:
11161116

1117-
# don't allow upcasts here
1117+
# don't allow upcasts here (except if empty)
11181118
if dtype.kind == result.dtype.kind:
1119-
if result.dtype.itemsize <= dtype.itemsize:
1119+
if result.dtype.itemsize <= dtype.itemsize and np.prod(result.shape):
11201120
return result
11211121

11221122
if issubclass(dtype.type, np.floating):
11231123
return result.astype(dtype)
11241124
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
11251125

1126+
# if we don't have any elements, just astype it
1127+
if not np.prod(result.shape):
1128+
return trans(result).astype(dtype)
1129+
11261130
# do a test on the first element, if it fails then we are done
11271131
r = result.ravel()
11281132
arr = np.array([r[0]])

pandas/tests/test_common.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,12 @@ def test_downcast_conv():
166166
result = com._possibly_downcast_to_dtype(arr,'infer')
167167
tm.assert_almost_equal(result, expected)
168168

169+
# empties
170+
for dtype in [np.int32,np.float64,np.float32,np.bool_,np.int64,object]:
171+
arr = np.array([],dtype=dtype)
172+
result = com._possibly_downcast_to_dtype(arr,'int64')
173+
tm.assert_almost_equal(result, np.array([],dtype=np.int64))
174+
assert result.dtype == np.int64
169175

170176
def test_array_equivalent():
171177
assert array_equivalent(np.array([np.nan, np.nan]),
@@ -182,10 +188,10 @@ def test_array_equivalent():
182188
np.array([np.nan, 2, np.nan]))
183189
assert not array_equivalent(np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e']))
184190
assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan]))
185-
assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))
186-
assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))
191+
assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))
192+
assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))
187193
assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]))
188-
194+
189195
def test_datetimeindex_from_empty_datetime64_array():
190196
for unit in [ 'ms', 'us', 'ns' ]:
191197
idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))

pandas/tests/test_groupby.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,6 +2237,14 @@ def test_groupby_aggregation_mixed_dtype(self):
22372237
result = g[['v1','v2']].mean()
22382238
assert_frame_equal(result,expected)
22392239

2240+
2241+
def test_groupby_dtype_inference_empty(self):
2242+
# GH 6733
2243+
df = DataFrame({'x': [], 'range': np.arange(0)})
2244+
result = df.groupby('x').first()
2245+
expected = DataFrame({'range' : Series([],index=Index([],name='x'),dtype='int64') })
2246+
assert_frame_equal(result,expected,by_blocks=True)
2247+
22402248
def test_groupby_list_infer_array_like(self):
22412249
result = self.df.groupby(list(self.df['A'])).mean()
22422250
expected = self.df.groupby(self.df['A']).mean()
@@ -3862,20 +3870,20 @@ def test_lexsort_indexer(self):
38623870
result = _lexsort_indexer(keys, orders=True, na_position='last')
38633871
expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
38643872
assert_equal(result, expected)
3865-
3873+
38663874
# orders=True, na_position='first'
38673875
result = _lexsort_indexer(keys, orders=True, na_position='first')
38683876
expected = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
38693877
assert_equal(result, expected)
3870-
3878+
38713879
# orders=False, na_position='last'
38723880
result = _lexsort_indexer(keys, orders=False, na_position='last')
3873-
expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
3881+
expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
38743882
assert_equal(result, expected)
3875-
3883+
38763884
# orders=False, na_position='first'
38773885
result = _lexsort_indexer(keys, orders=False, na_position='first')
3878-
expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
3886+
expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
38793887
assert_equal(result, expected)
38803888

38813889
def test_nargsort(self):
@@ -3887,7 +3895,7 @@ def test_nargsort(self):
38873895
try:
38883896
# GH 2785; due to a regression in NumPy1.6.2
38893897
np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
3890-
np.argsort(items2, kind='mergesort')
3898+
np.argsort(items2, kind='mergesort')
38913899
except TypeError as err:
38923900
raise nose.SkipTest('requested sort not available for type')
38933901

@@ -3898,7 +3906,7 @@ def test_nargsort(self):
38983906
# because quick and merge sort fall over to insertion sort for small
38993907
# arrays."""
39003908

3901-
3909+
39023910
# mergesort, ascending=True, na_position='last'
39033911
result = _nargsort(
39043912
items, kind='mergesort', ascending=True, na_position='last')

0 commit comments

Comments
 (0)