Skip to content

Commit f5b23a6

Browse files
committed
Add warning if both converter and dtype specified
1 parent f9ff10e commit f5b23a6

File tree

3 files changed

+41
-30
lines changed

3 files changed

+41
-30
lines changed

pandas/io/parsers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,12 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13141314

13151315
if conv_f is not None:
13161316
# conv_f applied to data before inference
1317-
# dtype isn't used if a converted specified
1317+
if cast_type is not None:
1318+
warnings.warn(("Both a converter and dtype were specified "
1319+
"for column {0} - only the converter will "
1320+
"be used").format(c), ParserWarning,
1321+
stacklevel=7)
1322+
13181323
try:
13191324
values = lib.map_infer(values, conv_f)
13201325
except ValueError:

pandas/io/tests/parser/dtypes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas import DataFrame, Series, Index, MultiIndex, Categorical
1313
from pandas.compat import StringIO
1414
from pandas.types.dtypes import CategoricalDtype
15+
from pandas.io.common import ParserWarning
1516

1617

1718
class DtypeTests(object):
@@ -219,8 +220,9 @@ def test_dtype_with_converter(self):
219220
data = """a,b
220221
1.1,2.2
221222
1.2,2.3"""
222-
result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
223-
converters={'a': lambda x: str(x)})
224223
# dtype spec ignored if converted specified
224+
with tm.assert_produces_warning(ParserWarning):
225+
result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
226+
converters={'a': lambda x: str(x)})
225227
expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
226228
tm.assert_frame_equal(result, expected)

pandas/parser.pyx

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
1313
PyUnicode_Check, PyUnicode_AsUTF8String,
1414
PyErr_Occurred, PyErr_Fetch)
1515
from cpython.ref cimport PyObject, Py_XDECREF
16-
from io.common import ParserError, DtypeWarning, EmptyDataError
16+
from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning
1717

1818
# Import CParserError as alias of ParserError for backwards compatibility.
1919
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
@@ -987,7 +987,7 @@ cdef class TextReader:
987987
Py_ssize_t i, nused
988988
kh_str_t *na_hashset = NULL
989989
int start, end
990-
object name, na_flist
990+
object name, na_flist, col_dtype = None
991991
bint na_filter = 0
992992
Py_ssize_t num_cols
993993

@@ -1043,14 +1043,33 @@ cdef class TextReader:
10431043
else:
10441044
na_filter = 0
10451045

1046+
col_dtype = None
1047+
if self.dtype is not None:
1048+
if isinstance(self.dtype, dict):
1049+
if name in self.dtype:
1050+
col_dtype = self.dtype[name]
1051+
elif i in self.dtype:
1052+
col_dtype = self.dtype[i]
1053+
else:
1054+
if self.dtype.names:
1055+
# structured array
1056+
col_dtype = np.dtype(self.dtype.descr[i][1])
1057+
else:
1058+
col_dtype = self.dtype
1059+
10461060
if conv:
1061+
if col_dtype is not None:
1062+
warnings.warn(("Both a converter and dtype were specified "
1063+
"for column {0} - only the converter will "
1064+
"be used").format(name), ParserWarning,
1065+
stacklevel=5)
10471066
results[i] = _apply_converter(conv, self.parser, i, start, end,
10481067
self.c_encoding)
10491068
continue
10501069

10511070
# Should return as the desired dtype (inferred or specified)
10521071
col_res, na_count = self._convert_tokens(
1053-
i, start, end, name, na_filter, na_hashset, na_flist)
1072+
i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
10541073

10551074
if na_filter:
10561075
self._free_na_set(na_hashset)
@@ -1075,32 +1094,17 @@ cdef class TextReader:
10751094
cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
10761095
object name, bint na_filter,
10771096
kh_str_t *na_hashset,
1078-
object na_flist):
1079-
cdef:
1080-
object col_dtype = None
1081-
1082-
if self.dtype is not None:
1083-
if isinstance(self.dtype, dict):
1084-
if name in self.dtype:
1085-
col_dtype = self.dtype[name]
1086-
elif i in self.dtype:
1087-
col_dtype = self.dtype[i]
1088-
else:
1089-
if self.dtype.names:
1090-
# structured array
1091-
col_dtype = np.dtype(self.dtype.descr[i][1])
1092-
else:
1093-
col_dtype = self.dtype
1097+
object na_flist, object col_dtype):
10941098

1095-
if col_dtype is not None:
1096-
col_res, na_count = self._convert_with_dtype(
1097-
col_dtype, i, start, end, na_filter,
1098-
1, na_hashset, na_flist)
1099+
if col_dtype is not None:
1100+
col_res, na_count = self._convert_with_dtype(
1101+
col_dtype, i, start, end, na_filter,
1102+
1, na_hashset, na_flist)
10991103

1100-
# Fallback on the parse (e.g. we requested int dtype,
1101-
# but its actually a float).
1102-
if col_res is not None:
1103-
return col_res, na_count
1104+
# Fallback on the parse (e.g. we requested int dtype,
1105+
# but its actually a float).
1106+
if col_res is not None:
1107+
return col_res, na_count
11041108

11051109
if i in self.noconvert:
11061110
return self._string_convert(i, start, end, na_filter, na_hashset)

0 commit comments

Comments
 (0)