Skip to content

BUG: Bug in downcasting inference with empty arrays (GH6733) #6737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 29, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ Bug Fixes
- Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`)
- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`)
- Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`)
- Bug in downcasting inference with empty arrays (:issue:`6733`)

pandas 0.13.1
-------------
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,7 @@ def _possibly_downcast_to_dtype(result, dtype):
or could be an astype of float64->float32
"""

if np.isscalar(result) or not len(result):
if np.isscalar(result):
return result

trans = lambda x: x
Expand Down Expand Up @@ -1114,15 +1114,19 @@ def _possibly_downcast_to_dtype(result, dtype):

try:

# don't allow upcasts here
# don't allow upcasts here (except if empty)
if dtype.kind == result.dtype.kind:
if result.dtype.itemsize <= dtype.itemsize:
if result.dtype.itemsize <= dtype.itemsize and np.prod(result.shape):
return result

if issubclass(dtype.type, np.floating):
return result.astype(dtype)
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):

# if we don't have any elements, just astype it
if not np.prod(result.shape):
return trans(result).astype(dtype)

# do a test on the first element, if it fails then we are done
r = result.ravel()
arr = np.array([r[0]])
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ def test_downcast_conv():
result = com._possibly_downcast_to_dtype(arr,'infer')
tm.assert_almost_equal(result, expected)

# empties
for dtype in [np.int32,np.float64,np.float32,np.bool_,np.int64,object]:
arr = np.array([],dtype=dtype)
result = com._possibly_downcast_to_dtype(arr,'int64')
tm.assert_almost_equal(result, np.array([],dtype=np.int64))
assert result.dtype == np.int64

def test_array_equivalent():
assert array_equivalent(np.array([np.nan, np.nan]),
Expand All @@ -182,10 +188,10 @@ def test_array_equivalent():
np.array([np.nan, 2, np.nan]))
assert not array_equivalent(np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e']))
assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan]))
assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))
assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))
assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))
assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))
assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]))

def test_datetimeindex_from_empty_datetime64_array():
for unit in [ 'ms', 'us', 'ns' ]:
idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
Expand Down
22 changes: 15 additions & 7 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,14 @@ def test_groupby_aggregation_mixed_dtype(self):
result = g[['v1','v2']].mean()
assert_frame_equal(result,expected)


def test_groupby_dtype_inference_empty(self):
# GH 6733
df = DataFrame({'x': [], 'range': np.arange(0)})
result = df.groupby('x').first()
expected = DataFrame({'range' : Series([],index=Index([],name='x'),dtype='int64') })
assert_frame_equal(result,expected,by_blocks=True)

def test_groupby_list_infer_array_like(self):
result = self.df.groupby(list(self.df['A'])).mean()
expected = self.df.groupby(self.df['A']).mean()
Expand Down Expand Up @@ -3862,20 +3870,20 @@ def test_lexsort_indexer(self):
result = _lexsort_indexer(keys, orders=True, na_position='last')
expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
assert_equal(result, expected)

# orders=True, na_position='first'
result = _lexsort_indexer(keys, orders=True, na_position='first')
expected = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
assert_equal(result, expected)

# orders=False, na_position='last'
result = _lexsort_indexer(keys, orders=False, na_position='last')
expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
assert_equal(result, expected)

# orders=False, na_position='first'
result = _lexsort_indexer(keys, orders=False, na_position='first')
expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
assert_equal(result, expected)

def test_nargsort(self):
Expand All @@ -3887,7 +3895,7 @@ def test_nargsort(self):
try:
# GH 2785; due to a regression in NumPy1.6.2
np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
np.argsort(items2, kind='mergesort')
np.argsort(items2, kind='mergesort')
except TypeError as err:
raise nose.SkipTest('requested sort not available for type')

Expand All @@ -3898,7 +3906,7 @@ def test_nargsort(self):
# because quick and merge sort fall over to insertion sort for small
# arrays."""


# mergesort, ascending=True, na_position='last'
result = _nargsort(
items, kind='mergesort', ascending=True, na_position='last')
Expand Down