From 238e30e2df55632b708faf45d67a0c6fc97f822d Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Tue, 28 Apr 2015 09:01:17 -0400
Subject: [PATCH 1/2] BUG: null group spills into final group when grouping on
 a categorical

---
 doc/source/whatsnew/v0.16.1.txt  |  2 +-
 pandas/lib.pyx                   | 20 ++++++++++++--------
 pandas/tests/test_categorical.py |  7 +++++++
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 2ddf77d99d51d..d00ce29fbfe92 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -218,7 +218,7 @@ Bug Fixes
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
 
 
-
+- Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`)
 
 
 - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`)
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 0d53b19425c2f..de966d6e03ee2 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1306,9 +1306,10 @@ def duplicated(ndarray[object] values, take_last=False):
 
 def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     cdef:
-        Py_ssize_t i, group_size, n, lab, start
+        Py_ssize_t i, group_size, n, start
+        int64_t lab
         object slobj
-        ndarray[int64_t] starts
+        ndarray[int64_t] starts, ends
 
     n = len(labels)
 
@@ -1318,13 +1319,16 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     start = 0
     group_size = 0
     for i in range(n):
-        group_size += 1
         lab = labels[i]
-        if i == n - 1 or lab != labels[i + 1]:
-            starts[lab] = start
-            ends[lab] = start + group_size
-            start += group_size
-            group_size = 0
+        if lab < 0:
+            start += 1
+        else:
+            group_size += 1
+            if i == n - 1 or lab != labels[i + 1]:
+                starts[lab] = start
+                ends[lab] = start + group_size
+                start += group_size
+                group_size = 0
 
     return starts, ends
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 66e411d1eaddb..5a5401c8da3ca 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1841,6 +1841,13 @@ def f(x):
         tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
         tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
 
+        # GH 9603
+        df = pd.DataFrame({'a': [1, 0, 0, 0]})
+        c = pd.cut(df.a, [0, 1, 2, 3, 4])
+        result = df.groupby(c).apply(len)
+        expected = pd.Series([1, 0, 0, 0], index=c.values.categories)
+        tm.assert_series_equal(result, expected)
+
     def test_pivot_table(self):
 
         raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"], ordered=True)

From d194c99db9df23e8a47f1a317492adab38efe306 Mon Sep 17 00:00:00 2001
From: Evan Wright <ewright@knight.com>
Date: Wed, 29 Apr 2015 15:26:30 -0400
Subject: [PATCH 2/2] Fix missing index name in test

---
 pandas/tests/test_categorical.py | 1 +
 1 file changed, 1 insertion(+)
 mode change 100644 => 100755 pandas/tests/test_categorical.py

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
old mode 100644
new mode 100755
index 5a5401c8da3ca..c03fd93f6173f
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1846,6 +1846,7 @@ def f(x):
         c = pd.cut(df.a, [0, 1, 2, 3, 4])
         result = df.groupby(c).apply(len)
         expected = pd.Series([1, 0, 0, 0], index=c.values.categories)
+        expected.index.name = 'a'
         tm.assert_series_equal(result, expected)
 
     def test_pivot_table(self):