diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index ffba681565f48..2d2e673e15e03 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -694,7 +694,7 @@ Bug Fixes - Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`) - Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`) - Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`) - +- Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`) - Bug in ``Timedelta.round`` with negative values (:issue:`11690`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 8181e69abc60b..bdb48521bd791 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -851,23 +851,24 @@ def __init__(self, encoding): float32_max = b'\xff\xff\xff\x7e' float64_min = b'\xff\xff\xff\xff\xff\xff\xef\xff' float64_max = b'\xff\xff\xff\xff\xff\xff\xdf\x7f' - self.VALID_RANGE = \ - { - 'b': (-127, 100), - 'h': (-32767, 32740), - 'l': (-2147483647, 2147483620), - 'f': (np.float32(struct.unpack(' 108: data = self._do_convert_categoricals(data, self.value_label_dict, self.lbllist, diff --git a/pandas/io/tests/data/S4_EDUC1.DTA b/pandas/io/tests/data/S4_EDUC1.DTA new file mode 100644 index 0000000000000..2d5533b7e621c Binary files /dev/null and b/pandas/io/tests/data/S4_EDUC1.DTA differ diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index e1e12e47457f9..0389cc4b113cf 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -409,9 +409,9 @@ def test_read_write_dta12(self): written_and_read_again.set_index('index'), formatted) def test_read_write_dta13(self): - s1 = Series(2**9, dtype=np.int16) - s2 = Series(2**17, dtype=np.int32) - s3 = Series(2**33, dtype=np.int64) + s1 = Series(2 ** 9, dtype=np.int16) + s2 = Series(2 ** 17, dtype=np.int32) + s3 = Series(2 ** 33, dtype=np.int64) original = DataFrame({'int16': s1, 'int32': s2, 'int64': s3}) original.index.name = 'index' @@ -568,6 +568,20 @@ def test_dates_invalid_column(self): tm.assert_frame_equal(written_and_read_again.set_index('index'), modified) + def test_105(self): + # Data obtained from: + # http://go.worldbank.org/ZXY29PVJ21 + dpath = os.path.join(self.dirpath, 'S4_EDUC1.DTA') + df = pd.read_stata(dpath) + df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]] + df0 = pd.DataFrame(df0) + df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"] + df0['clustnum'] = df0["clustnum"].astype(np.int16) + df0['pri_schl'] = df0["pri_schl"].astype(np.int8) + df0['psch_num'] = df0["psch_num"].astype(np.int8) + df0['psch_dis'] = df0["psch_dis"].astype(np.float32) + tm.assert_frame_equal(df.head(3), df0) + def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] conversions = dict(((c, c) for c in columns))