BUG: error when reading one empty column from excel file

BranYang · jreback · commit 78d671fbcd18 · 2016-02-27T10:08:29.000-05:00
closes #12292 closes #9002 closes #12296
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -1102,6 +1102,7 @@ Bug Fixes
 - Bug in ``Series.plot`` failing when index has a ``CustomBusinessDay`` frequency (:issue:`7222`).
 - Bug in ``.to_sql`` for ``datetime.time`` values with sqlite fallback (:issue:`8341`)
 - Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`)
+- Bug in ``read_excel`` failing to read one empty column (:issue:`12292`, :issue:`9002`)
 - Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`)
 - Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`)
 - Bug in ``.read_csv`` where strings like ``'2E'`` are treated as valid floats (:issue:`12237`)
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -448,21 +448,26 @@ def _parse_cell(cell_contents, cell_typ):
             if com.is_list_like(header) and len(header) > 1:
                 has_index_names = True
 
-            parser = TextParser(data, header=header, index_col=index_col,
-                                has_index_names=has_index_names,
-                                na_values=na_values,
-                                thousands=thousands,
-                                parse_dates=parse_dates,
-                                date_parser=date_parser,
-                                skiprows=skiprows,
-                                skip_footer=skip_footer,
-                                squeeze=squeeze,
-                                **kwds)
-
-            output[asheetname] = parser.read()
-            if not squeeze or isinstance(output[asheetname], DataFrame):
-                output[asheetname].columns = output[
-                    asheetname].columns.set_names(header_names)
+            # GH 12292 : error when read one empty column from excel file
+            try:
+                parser = TextParser(data, header=header, index_col=index_col,
+                                    has_index_names=has_index_names,
+                                    na_values=na_values,
+                                    thousands=thousands,
+                                    parse_dates=parse_dates,
+                                    date_parser=date_parser,
+                                    skiprows=skiprows,
+                                    skip_footer=skip_footer,
+                                    squeeze=squeeze,
+                                    **kwds)
+
+                output[asheetname] = parser.read()
+                if not squeeze or isinstance(output[asheetname], DataFrame):
+                    output[asheetname].columns = output[
+                        asheetname].columns.set_names(header_names)
+            except StopIteration:
+                # No Data, return an empty DataFrame
+                output[asheetname] = DataFrame()
 
         if ret_dict:
             return output
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -401,6 +401,60 @@ def test_read_excel_blank_with_header(self):
         actual = self.get_exceldf('blank_with_header', 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
+    # GH 12292 : error when read one empty column from excel file
+    def test_read_one_empty_col_no_header(self):
+        df = pd.DataFrame(
+            [["", 1, 100],
+             ["", 2, 200],
+             ["", 3, 300],
+             ["", 4, 400]]
+        )
+        with ensure_clean(self.ext) as path:
+            df.to_excel(path, 'no_header', index=False, header=False)
+            actual_header_none = read_excel(
+                path,
+                'no_header',
+                parse_cols=[0],
+                header=None
+            )
+
+            actual_header_zero = read_excel(
+                path,
+                'no_header',
+                parse_cols=[0],
+                header=0
+            )
+        expected = DataFrame()
+        tm.assert_frame_equal(actual_header_none, expected)
+        tm.assert_frame_equal(actual_header_zero, expected)
+
+    def test_read_one_empty_col_with_header(self):
+        df = pd.DataFrame(
+            [["", 1, 100],
+             ["", 2, 200],
+             ["", 3, 300],
+             ["", 4, 400]]
+        )
+        with ensure_clean(self.ext) as path:
+            df.to_excel(path, 'with_header', index=False, header=True)
+            actual_header_none = read_excel(
+                path,
+                'with_header',
+                parse_cols=[0],
+                header=None
+            )
+
+            actual_header_zero = read_excel(
+                path,
+                'with_header',
+                parse_cols=[0],
+                header=0
+            )
+        expected_header_none = DataFrame(pd.Series([0], dtype='int64'))
+        tm.assert_frame_equal(actual_header_none, expected_header_none)
+        expected_header_zero = DataFrame(columns=[0], dtype='int64')
+        tm.assert_frame_equal(actual_header_zero, expected_header_zero)
+
 
 class XlrdTests(ReadingTestsBase):
     """