Add warning if both converter and dtype specified

chris-b1 · chris-b1 · commit f5b23a67b549 · 2016-11-23T08:34:33.000-06:00
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1314,7 +1314,12 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
 
             if conv_f is not None:
                 # conv_f applied to data before inference
-                # dtype isn't used if a converted specified
+                if cast_type is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(c), ParserWarning,
+                                  stacklevel=7)
+
                 try:
                     values = lib.map_infer(values, conv_f)
                 except ValueError:
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
@@ -12,6 +12,7 @@
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
 from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
+from pandas.io.common import ParserWarning
 
 
 class DtypeTests(object):
@@ -219,8 +220,9 @@ def test_dtype_with_converter(self):
         data = """a,b
 1.1,2.2
 1.2,2.3"""
-        result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
-                               converters={'a': lambda x: str(x)})
         # dtype spec ignored if converted specified
+        with tm.assert_produces_warning(ParserWarning):
+            result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
+                                converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
                       PyUnicode_Check, PyUnicode_AsUTF8String,
                       PyErr_Occurred, PyErr_Fetch)
 from cpython.ref cimport PyObject, Py_XDECREF
-from io.common import ParserError, DtypeWarning, EmptyDataError
+from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning
 
 # Import CParserError as alias of ParserError for backwards compatibility.
 # Ultimately, we want to remove this import. See gh-12665 and gh-14479.
@@ -987,7 +987,7 @@ cdef class TextReader:
             Py_ssize_t i, nused
             kh_str_t *na_hashset = NULL
             int start, end
-            object name, na_flist
+            object name, na_flist, col_dtype = None
             bint na_filter = 0
             Py_ssize_t num_cols
 
@@ -1043,14 +1043,33 @@ cdef class TextReader:
             else:
                 na_filter = 0
 
+            col_dtype = None
+            if self.dtype is not None:
+                if isinstance(self.dtype, dict):
+                    if name in self.dtype:
+                        col_dtype = self.dtype[name]
+                    elif i in self.dtype:
+                        col_dtype = self.dtype[i]
+                else:
+                    if self.dtype.names:
+                        # structured array
+                        col_dtype = np.dtype(self.dtype.descr[i][1])
+                    else:
+                        col_dtype = self.dtype
+
             if conv:
+                if col_dtype is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(name), ParserWarning,
+                                  stacklevel=5)
                 results[i] = _apply_converter(conv, self.parser, i, start, end,
                                               self.c_encoding)
                 continue
 
             # Should return as the desired dtype (inferred or specified)
             col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset, na_flist)
+                i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
 
             if na_filter:
                 self._free_na_set(na_hashset)
@@ -1075,32 +1094,17 @@ cdef class TextReader:
     cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
                                 object name, bint na_filter,
                                 kh_str_t *na_hashset,
-                                object na_flist):
-        cdef:
-            object col_dtype = None
-
-        if self.dtype is not None:
-            if isinstance(self.dtype, dict):
-                if name in self.dtype:
-                    col_dtype = self.dtype[name]
-                elif i in self.dtype:
-                    col_dtype = self.dtype[i]
-            else:
-                if self.dtype.names:
-                    # structured array
-                    col_dtype = np.dtype(self.dtype.descr[i][1])
-                else:
-                    col_dtype = self.dtype
+                                object na_flist, object col_dtype):
 
-            if col_dtype is not None:
-                col_res, na_count = self._convert_with_dtype(
-                    col_dtype, i, start, end, na_filter,
-                    1, na_hashset, na_flist)
+        if col_dtype is not None:
+            col_res, na_count = self._convert_with_dtype(
+                col_dtype, i, start, end, na_filter,
+                1, na_hashset, na_flist)
 
-                # Fallback on the parse (e.g. we requested int dtype,
-                # but its actually a float).
-                if col_res is not None:
-                    return col_res, na_count
+            # Fallback on the parse (e.g. we requested int dtype,
+            # but its actually a float).
+            if col_res is not None:
+                return col_res, na_count
 
         if i in self.noconvert:
             return self._string_convert(i, start, end, na_filter, na_hashset)