Skip to content

Commit d7d1e9f

Browse files
committed
add test/fix for dtype=object
1 parent 79de61c commit d7d1e9f

File tree

2 files changed

+17
-13
lines changed

2 files changed

+17
-13
lines changed

pandas/io/parsers.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,7 @@ def _apply_converter(self, values, conv_f, na_values, col_na_values,
13031303

13041304
cvals, na_count = self._infer_types(
13051305
values, set(col_na_values) | col_na_fvalues,
1306-
try_numeric=False)
1306+
try_num_bool=False)
13071307
return cvals, na_count
13081308

13091309
def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
@@ -1330,10 +1330,15 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13301330
values, conv_f, na_values,
13311331
col_na_values, col_na_fvalues)
13321332
else:
1333+
try_num_bool = True
1334+
if cast_type and is_object_dtype(cast_type):
1335+
# skip inference if specified dtype is object
1336+
try_num_bool = False
1337+
13331338
# general type inference and conversion
13341339
cvals, na_count = self._infer_types(
13351340
values, set(col_na_values) | col_na_fvalues,
1336-
try_numeric=True)
1341+
try_num_bool)
13371342

13381343
if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
13391344
cvals = lib.downcast_int64(
@@ -1356,7 +1361,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13561361
print('Filled %d NA values in column %s' % (na_count, str(c)))
13571362
return result
13581363

1359-
def _infer_types(self, values, na_values, try_numeric=True):
1364+
def _infer_types(self, values, na_values, try_num_bool=True):
13601365
na_count = 0
13611366
if issubclass(values.dtype.type, (np.number, np.bool_)):
13621367
mask = lib.ismember(values, na_values)
@@ -1367,7 +1372,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
13671372
np.putmask(values, mask, np.nan)
13681373
return values, na_count
13691374

1370-
if try_numeric:
1375+
if try_num_bool:
13711376
try:
13721377
result = lib.maybe_convert_numeric(values, na_values, False)
13731378
na_count = isnull(result).sum()
@@ -1380,7 +1385,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
13801385
if values.dtype == np.object_:
13811386
na_count = lib.sanitize_objects(values, na_values, False)
13821387

1383-
if result.dtype == np.object_ and try_numeric:
1388+
if result.dtype == np.object_ and try_num_bool:
13841389
result = lib.maybe_convert_bool(values,
13851390
true_values=self.true_values,
13861391
false_values=self.false_values)

pandas/io/tests/parser/dtypes.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,12 @@
55
for all of the parsers defined in parsers.py
66
"""
77

8-
from datetime import datetime
9-
10-
import nose
11-
128
import numpy as np
139
import pandas as pd
1410
import pandas.util.testing as tm
1511

16-
from pandas.lib import Timestamp
1712
from pandas import DataFrame, Series, Index, MultiIndex, Categorical
18-
from pandas.compat import parse_date, StringIO, lmap
13+
from pandas.compat import StringIO
1914
from pandas.types.dtypes import CategoricalDtype
2015

2116

@@ -30,8 +25,12 @@ def test_passing_dtype(self):
3025

3126
# see gh-3795: passing 'str' as the dtype
3227
result = self.read_csv(path, dtype=str, index_col=0)
33-
tm.assert_series_equal(result.dtypes, Series(
34-
{'A': 'object', 'B': 'object'}))
28+
expected = df.astype(str)
29+
tm.assert_frame_equal(result, expected)
30+
31+
# for parsing, interpret object as str
32+
result = self.read_csv(path, dtype=object, index_col=0)
33+
tm.assert_frame_equal(result, expected)
3534

3635
# we expect all object columns, so need to
3736
# convert to test for equivalence

0 commit comments

Comments
 (0)