BUG: Bug in .read_csv with dtype specified on empty data producing an error

Sereger13 · jreback · commit ab3291d7ddbe · 2016-01-24T17:39:05.000-05:00
closes #12048
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -531,12 +531,12 @@ of columns didn't match the number of series provided (:issue:`12039`).
 
 
 - Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`)
+- Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`)
+- Bug in building *pandas* with debugging symbols (:issue:`12123`)
 
 
 - Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`).
 - Bug in ``Series`` constructor with read-only data (:issue:`11502`)
 
 - Bug in ``.loc`` setitem indexer preventing the use of a TZ-aware DatetimeIndex (:issue:`12050`)
 - Big in ``.style`` indexes and multi-indexes not appearing (:issue:`11655`)
-
-- Bug in building Pandas with debugging symbols (:issue:`12123`)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -4,6 +4,7 @@
 from __future__ import print_function
 from pandas.compat import range, lrange, StringIO, lzip, zip, string_types, map
 from pandas import compat
+from collections import defaultdict
 import re
 import csv
 import warnings
@@ -2264,6 +2265,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
     if dtype is None:
         dtype = {}
     else:
+        if not isinstance(dtype, dict):
+            dtype = defaultdict(lambda: dtype)
         # Convert column indexes to column names.
         dtype = dict((columns[k] if com.is_integer(k) else k, v)
                      for k, v in compat.iteritems(dtype))
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -695,6 +695,14 @@ def test_passing_dtype(self):
                                   dtype={'A': 'timedelta64', 'B': 'float64'},
                                   index_col=0)
 
+        with tm.assertRaisesRegexp(ValueError,
+                                   "The 'dtype' option is not supported"):
+
+            # empty frame
+            # GH12048
+            self.read_csv(StringIO('A,B'), dtype=str)
+
+
         def test_quoting(self):
             bad_line_small = """printer\tresult\tvariant_name
 Klosterdruckerei\tKlosterdruckerei <Salem> (1611-1804)\tMuller, Jacob
@@ -3588,6 +3596,12 @@ def test_passing_dtype(self):
             self.assertRaises(TypeError, self.read_csv, path, dtype={'A': 'timedelta64', 'B': 'float64'},
                               index_col=0)
 
+        # empty frame
+        # GH12048
+        actual = self.read_csv(StringIO('A,B'), dtype=str)
+        expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
+        tm.assert_frame_equal(actual, expected)
+
     def test_dtype_and_names_error(self):
 
         # GH 8833