pandas-dev · jreback · Oct 31, 2014 · Oct 31, 2014
diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt
@@ -183,7 +183,9 @@ Bug Fixes
 - Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`)
 - Bug in writing Categorical columns to an SQL database with ``to_sql`` (:issue:`8624`).
 - Bug in comparing ``Categorical`` of datetime raising when being compared to a scalar datetime (:issue:`8687`)
-
+- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`)
+- Bug in groupby-transform with a Categorical (:issue:`8623`)
+- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`)
 
 
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2732,19 +2732,20 @@ def _m8_to_i8(x):
                 return x.view(np.int64)
             return x
 
+        # if we are only duplicating on Categoricals this can be much faster
         if subset is None:
-            values = list(_m8_to_i8(self.values.T))
+            values = list(_m8_to_i8(self.get_values().T))
         else:
             if np.iterable(subset) and not isinstance(subset, compat.string_types):
                 if isinstance(subset, tuple):
                     if subset in self.columns:
-                        values = [self[subset].values]
+                        values = [self[subset].get_values()]
                     else:
-                        values = [_m8_to_i8(self[x].values) for x in subset]
+                        values = [_m8_to_i8(self[x].get_values()) for x in subset]
                 else:
-                    values = [_m8_to_i8(self[x].values) for x in subset]
+                    values = [_m8_to_i8(self[x].get_values()) for x in subset]
             else:
-                values = [self[subset].values]
+                values = [self[subset].get_values()]
 
         keys = lib.fast_zip_fillna(values)
         duplicated = lib.duplicated(keys, take_last=take_last)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -58,6 +58,7 @@ class Block(PandasObject):
     _verify_integrity = True
     _validate_ndim = True
     _ftype = 'dense'
+    _holder = None
 
     def __init__(self, values, placement, ndim=None, fastpath=False):
         if ndim is None:
@@ -476,6 +477,14 @@ def to_native_types(self, slicer=None, na_rep='', **kwargs):
 
     def _concat_blocks(self, blocks, values):
         """ return the block concatenation """
+
+        # dispatch to a categorical to handle the concat
+        if self._holder is None:
+
+            for b in blocks:
+                if b.is_categorical:
+                    return b._concat_blocks(blocks,values)
+
         return self._holder(values[0])
 
     # block actions ####
@@ -1739,10 +1748,24 @@ def _concat_blocks(self, blocks, values):
         return the block concatenation
         """
 
-        categories = self.values.categories
-        for b in blocks:
+        # we could have object blocks and categorical's here
+        # if we only have a single cateogoricals then combine everything
+        # else its a non-compat categorical
+
+        categoricals = [ b for b in blocks if b.is_categorical ]
+        objects = [ b for b in blocks if not b.is_categorical and b.is_object ]
+
+        # convert everything to object and call it a day
+        if len(objects) + len(categoricals) != len(blocks):
+            raise ValueError("try to combine non-object blocks and categoricals")
+
+        # validate the categories
+        categories = None
+        for b in categoricals:
+            if categories is None:
+                categories = b.values.categories
             if not categories.equals(b.values.categories):
-                raise ValueError("incompatible levels in categorical block merge")
+                raise ValueError("incompatible categories in categorical block merge")
 
         return self._holder(values[0], categories=categories)
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -475,7 +475,13 @@ def _ixs(self, i, axis=0):
         value : scalar (int) or Series (slice, sequence)
         """
         try:
-            return _index.get_value_at(self.values, i)
+
+            # dispatch to the values if we need
+            values = self.values
+            if isinstance(values, np.ndarray):
+                return _index.get_value_at(values, i)
+            else:
+                return values[i]
         except IndexError:
             raise
         except:

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -1030,6 +1030,21 @@ def test_basic(self):
         str(df.values)
         str(df)
 
+        # GH8623
+        x = pd.DataFrame([[1,'John P. Doe'],[2,'Jane Dove'],[1,'John P. Doe']],
+                         columns=['person_id','person_name'])
+        x['person_name'] = pd.Categorical(x.person_name) # doing this breaks transform
+
+        expected = x.iloc[0].person_name
+        result = x.person_name.iloc[0]
+        self.assertEqual(result,expected)
+
+        result = x.person_name[0]
+        self.assertEqual(result,expected)
+
+        result = x.person_name.loc[0]
+        self.assertEqual(result,expected)
+
     def test_creation_astype(self):
         l = ["a","b","c","a"]
         s = pd.Series(l)
@@ -1477,6 +1492,28 @@ def test_groupby(self):
         result = gb.sum()
         tm.assert_frame_equal(result, expected)
 
+        # GH 8623
+        x=pd.DataFrame([[1,'John P. Doe'],[2,'Jane Dove'],[1,'John P. Doe']],
+                       columns=['person_id','person_name'])
+        x['person_name'] = pd.Categorical(x.person_name)
+
+        g = x.groupby(['person_id'])
+        result = g.transform(lambda x:x)
+        tm.assert_frame_equal(result, x[['person_name']])
+
+        result = x.drop_duplicates('person_name')
+        expected = x.iloc[[0,1]]
+        tm.assert_frame_equal(result, expected)
+
+        def f(x):
+            return x.drop_duplicates('person_name').iloc[0]
+
+        result = g.apply(f)
+        expected = x.iloc[[0,1]].copy()
+        expected.index = Index([1,2],name='person_id')
+        expected['person_name'] = expected['person_name'].astype('object')
+        tm.assert_frame_equal(result, expected)
+
     def test_pivot_table(self):
 
         raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"])