Skip to content

Commit 78d671f

Browse files
BranYangjreback
authored andcommitted
BUG: error when reading one empty column from excel file
closes #12292 closes #9002 closes #12296
1 parent 8c41e62 commit 78d671f

File tree

3 files changed

+75
-15
lines changed

3 files changed

+75
-15
lines changed

doc/source/whatsnew/v0.18.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,7 @@ Bug Fixes
11021102
- Bug in ``Series.plot`` failing when index has a ``CustomBusinessDay`` frequency (:issue:`7222`).
11031103
- Bug in ``.to_sql`` for ``datetime.time`` values with sqlite fallback (:issue:`8341`)
11041104
- Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`)
1105+
- Bug in ``read_excel`` failing to read one empty column (:issue:`12292`, :issue:`9002`)
11051106
- Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`)
11061107
- Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`)
11071108
- Bug in ``.read_csv`` where strings like ``'2E'`` are treated as valid floats (:issue:`12237`)

pandas/io/excel.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -448,21 +448,26 @@ def _parse_cell(cell_contents, cell_typ):
448448
if com.is_list_like(header) and len(header) > 1:
449449
has_index_names = True
450450

451-
parser = TextParser(data, header=header, index_col=index_col,
452-
has_index_names=has_index_names,
453-
na_values=na_values,
454-
thousands=thousands,
455-
parse_dates=parse_dates,
456-
date_parser=date_parser,
457-
skiprows=skiprows,
458-
skip_footer=skip_footer,
459-
squeeze=squeeze,
460-
**kwds)
461-
462-
output[asheetname] = parser.read()
463-
if not squeeze or isinstance(output[asheetname], DataFrame):
464-
output[asheetname].columns = output[
465-
asheetname].columns.set_names(header_names)
451+
# GH 12292 : error when read one empty column from excel file
452+
try:
453+
parser = TextParser(data, header=header, index_col=index_col,
454+
has_index_names=has_index_names,
455+
na_values=na_values,
456+
thousands=thousands,
457+
parse_dates=parse_dates,
458+
date_parser=date_parser,
459+
skiprows=skiprows,
460+
skip_footer=skip_footer,
461+
squeeze=squeeze,
462+
**kwds)
463+
464+
output[asheetname] = parser.read()
465+
if not squeeze or isinstance(output[asheetname], DataFrame):
466+
output[asheetname].columns = output[
467+
asheetname].columns.set_names(header_names)
468+
except StopIteration:
469+
# No Data, return an empty DataFrame
470+
output[asheetname] = DataFrame()
466471

467472
if ret_dict:
468473
return output

pandas/io/tests/test_excel.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,60 @@ def test_read_excel_blank_with_header(self):
401401
actual = self.get_exceldf('blank_with_header', 'Sheet1')
402402
tm.assert_frame_equal(actual, expected)
403403

404+
# GH 12292 : error when read one empty column from excel file
405+
def test_read_one_empty_col_no_header(self):
406+
df = pd.DataFrame(
407+
[["", 1, 100],
408+
["", 2, 200],
409+
["", 3, 300],
410+
["", 4, 400]]
411+
)
412+
with ensure_clean(self.ext) as path:
413+
df.to_excel(path, 'no_header', index=False, header=False)
414+
actual_header_none = read_excel(
415+
path,
416+
'no_header',
417+
parse_cols=[0],
418+
header=None
419+
)
420+
421+
actual_header_zero = read_excel(
422+
path,
423+
'no_header',
424+
parse_cols=[0],
425+
header=0
426+
)
427+
expected = DataFrame()
428+
tm.assert_frame_equal(actual_header_none, expected)
429+
tm.assert_frame_equal(actual_header_zero, expected)
430+
431+
def test_read_one_empty_col_with_header(self):
432+
df = pd.DataFrame(
433+
[["", 1, 100],
434+
["", 2, 200],
435+
["", 3, 300],
436+
["", 4, 400]]
437+
)
438+
with ensure_clean(self.ext) as path:
439+
df.to_excel(path, 'with_header', index=False, header=True)
440+
actual_header_none = read_excel(
441+
path,
442+
'with_header',
443+
parse_cols=[0],
444+
header=None
445+
)
446+
447+
actual_header_zero = read_excel(
448+
path,
449+
'with_header',
450+
parse_cols=[0],
451+
header=0
452+
)
453+
expected_header_none = DataFrame(pd.Series([0], dtype='int64'))
454+
tm.assert_frame_equal(actual_header_none, expected_header_none)
455+
expected_header_zero = DataFrame(columns=[0], dtype='int64')
456+
tm.assert_frame_equal(actual_header_zero, expected_header_zero)
457+
404458

405459
class XlrdTests(ReadingTestsBase):
406460
"""

0 commit comments

Comments
 (0)