pandas-dev · jreback · Mar 29, 2014 · Mar 29, 2014
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -292,6 +292,7 @@ Bug Fixes
 - Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`)
 - Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`)
 - Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`)
+- Bug in downcasting inference with empty arrays (:issue:`6733`)
 
 pandas 0.13.1
 -------------

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1084,7 +1084,7 @@ def _possibly_downcast_to_dtype(result, dtype):
     or could be an astype of float64->float32
     """
 
-    if np.isscalar(result) or not len(result):
+    if np.isscalar(result):
         return result
 
     trans = lambda x: x
@@ -1114,15 +1114,19 @@ def _possibly_downcast_to_dtype(result, dtype):
 
     try:
 
-        # don't allow upcasts here
+        # don't allow upcasts here (except if empty)
         if dtype.kind == result.dtype.kind:
-            if result.dtype.itemsize <= dtype.itemsize:
+            if result.dtype.itemsize <= dtype.itemsize and np.prod(result.shape):
                 return result
 
         if issubclass(dtype.type, np.floating):
             return result.astype(dtype)
         elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
 
+            # if we don't have any elements, just astype it
+            if not np.prod(result.shape):
+                return trans(result).astype(dtype)
+
             # do a test on the first element, if it fails then we are done
             r = result.ravel()
             arr = np.array([r[0]])

diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
@@ -166,6 +166,12 @@ def test_downcast_conv():
         result = com._possibly_downcast_to_dtype(arr,'infer')
         tm.assert_almost_equal(result, expected)
 
+    # empties
+    for dtype in [np.int32,np.float64,np.float32,np.bool_,np.int64,object]:
+        arr = np.array([],dtype=dtype)
+        result = com._possibly_downcast_to_dtype(arr,'int64')
+        tm.assert_almost_equal(result, np.array([],dtype=np.int64))
+        assert result.dtype == np.int64
 
 def test_array_equivalent():
     assert array_equivalent(np.array([np.nan, np.nan]),
@@ -182,10 +188,10 @@ def test_array_equivalent():
                                 np.array([np.nan, 2, np.nan]))
     assert not array_equivalent(np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e']))
     assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan]))
-    assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))    
-    assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))    
+    assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan]))
+    assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]))
     assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]))
-    
+
 def test_datetimeindex_from_empty_datetime64_array():
     for unit in [ 'ms', 'us', 'ns' ]:
         idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -2237,6 +2237,14 @@ def test_groupby_aggregation_mixed_dtype(self):
         result = g[['v1','v2']].mean()
         assert_frame_equal(result,expected)
 
+
+    def test_groupby_dtype_inference_empty(self):
+        # GH 6733
+        df = DataFrame({'x': [], 'range': np.arange(0)})
+        result = df.groupby('x').first()
+        expected = DataFrame({'range' : Series([],index=Index([],name='x'),dtype='int64') })
+        assert_frame_equal(result,expected,by_blocks=True)
+
     def test_groupby_list_infer_array_like(self):
         result = self.df.groupby(list(self.df['A'])).mean()
         expected = self.df.groupby(self.df['A']).mean()
@@ -3862,20 +3870,20 @@ def test_lexsort_indexer(self):
         result = _lexsort_indexer(keys, orders=True, na_position='last')
         expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
         assert_equal(result, expected)
-        
+
         # orders=True, na_position='first'
         result = _lexsort_indexer(keys, orders=True, na_position='first')
         expected = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
         assert_equal(result, expected)
-        
+
         # orders=False, na_position='last'
         result = _lexsort_indexer(keys, orders=False, na_position='last')
-        expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) 
+        expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
         assert_equal(result, expected)
-        
+
         # orders=False, na_position='first'
         result = _lexsort_indexer(keys, orders=False, na_position='first')
-        expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) 
+        expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
         assert_equal(result, expected)
 
     def test_nargsort(self):
@@ -3887,7 +3895,7 @@ def test_nargsort(self):
         try:
             # GH 2785; due to a regression in NumPy1.6.2
             np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
-            np.argsort(items2, kind='mergesort') 
+            np.argsort(items2, kind='mergesort')
         except TypeError as err:
             raise nose.SkipTest('requested sort not available for type')
 
@@ -3898,7 +3906,7 @@ def test_nargsort(self):
         # because quick and merge sort fall over to insertion sort for small
         # arrays."""
 
-        
+
         # mergesort, ascending=True, na_position='last'
         result = _nargsort(
             items, kind='mergesort', ascending=True, na_position='last')