add test/fix for dtype=object

chris-b1 · chris-b1 · commit d7d1e9fff02f · 2016-09-25T09:10:13.000-05:00
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1303,7 +1303,7 @@ def _apply_converter(self, values, conv_f, na_values, col_na_values,
 
         cvals, na_count = self._infer_types(
             values, set(col_na_values) | col_na_fvalues,
-            try_numeric=False)
+            try_num_bool=False)
         return cvals, na_count
 
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
@@ -1330,10 +1330,15 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                     values, conv_f, na_values,
                     col_na_values, col_na_fvalues)
             else:
+                try_num_bool = True
+                if cast_type and is_object_dtype(cast_type):
+                    # skip inference if specified dtype is object
+                    try_num_bool = False
+
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
                     values, set(col_na_values) | col_na_fvalues,
-                    try_numeric=True)
+                    try_num_bool)
 
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
@@ -1356,7 +1361,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
-    def _infer_types(self, values, na_values, try_numeric=True):
+    def _infer_types(self, values, na_values, try_num_bool=True):
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1367,7 +1372,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        if try_numeric:
+        if try_num_bool:
             try:
                 result = lib.maybe_convert_numeric(values, na_values, False)
                 na_count = isnull(result).sum()
@@ -1380,7 +1385,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
             if values.dtype == np.object_:
                 na_count = lib.sanitize_objects(values, na_values, False)
 
-        if result.dtype == np.object_ and try_numeric:
+        if result.dtype == np.object_ and try_num_bool:
             result = lib.maybe_convert_bool(values,
                                             true_values=self.true_values,
                                             false_values=self.false_values)
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
@@ -5,17 +5,12 @@
 for all of the parsers defined in parsers.py
 """
 
-from datetime import datetime
-
-import nose
-
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
 
-from pandas.lib import Timestamp
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
-from pandas.compat import parse_date, StringIO, lmap
+from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
 
 
@@ -30,8 +25,12 @@ def test_passing_dtype(self):
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            tm.assert_series_equal(result.dtypes, Series(
-                {'A': 'object', 'B': 'object'}))
+            expected = df.astype(str)
+            tm.assert_frame_equal(result, expected)
+
+            # for parsing, interpret object as str
+            result = self.read_csv(path, dtype=object, index_col=0)
+            tm.assert_frame_equal(result, expected)
 
             # we expect all object columns, so need to
             # convert to test for equivalence