|
9 | 9 | import numpy as np |
10 | 10 | from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, |
11 | 11 | Categorical, compat, concat, option_context) |
12 | | -from pandas.compat import u |
| 12 | +from pandas.compat import u, PY2 |
13 | 13 | from pandas import _np_version_under1p14 |
14 | 14 |
|
15 | 15 | from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype |
|
21 | 21 | import pandas as pd |
22 | 22 |
|
23 | 23 |
|
| 24 | +@pytest.fixture(params=[str, compat.text_type]) |
| 25 | +def text_dtype(request): |
| 26 | + return request.param |
| 27 | + |
| 28 | + |
24 | 29 | class TestDataFrameDataTypes(TestData): |
25 | 30 |
|
26 | 31 | def test_concat_empty_dataframe_dtypes(self): |
@@ -351,27 +356,24 @@ def test_select_dtypes_datetime_with_tz(self): |
351 | 356 | expected = df3.reindex(columns=[]) |
352 | 357 | assert_frame_equal(result, expected) |
353 | 358 |
|
354 | | - def test_select_dtypes_str_raises(self): |
355 | | - df = DataFrame({'a': list('abc'), |
356 | | - 'g': list(u('abc')), |
357 | | - 'b': list(range(1, 4)), |
358 | | - 'c': np.arange(3, 6).astype('u1'), |
359 | | - 'd': np.arange(4.0, 7.0, dtype='float64'), |
360 | | - 'e': [True, False, True], |
361 | | - 'f': pd.date_range('now', periods=3).values}) |
362 | | - string_dtypes = set((str, 'str', np.string_, 'S1', |
363 | | - 'unicode', np.unicode_, 'U1')) |
364 | | - try: |
365 | | - string_dtypes.add(unicode) |
366 | | - except NameError: |
367 | | - pass |
368 | | - for dt in string_dtypes: |
369 | | - with tm.assert_raises_regex(TypeError, |
370 | | - 'string dtypes are not allowed'): |
371 | | - df.select_dtypes(include=[dt]) |
372 | | - with tm.assert_raises_regex(TypeError, |
373 | | - 'string dtypes are not allowed'): |
374 | | - df.select_dtypes(exclude=[dt]) |
| 359 | + @pytest.mark.parametrize( |
| 360 | + "dtype", {str, "str", np.string_, "S1", |
| 361 | + "unicode", np.unicode_, "U1"} |
| 362 | + .union({unicode} if PY2 else {})) |
| 363 | + @pytest.mark.parametrize("arg", ["include", "exclude"]) |
| 364 | + def test_select_dtypes_str_raises(self, dtype, arg): |
| 365 | + df = DataFrame({"a": list("abc"), |
| 366 | + "g": list(u("abc")), |
| 367 | + "b": list(range(1, 4)), |
| 368 | + "c": np.arange(3, 6).astype("u1"), |
| 369 | + "d": np.arange(4.0, 7.0, dtype="float64"), |
| 370 | + "e": [True, False, True], |
| 371 | + "f": pd.date_range("now", periods=3).values}) |
| 372 | + msg = "string dtypes are not allowed" |
| 373 | + kwargs = {arg: [dtype]} |
| 374 | + |
| 375 | + with tm.assert_raises_regex(TypeError, msg): |
| 376 | + df.select_dtypes(**kwargs) |
375 | 377 |
|
376 | 378 | def test_select_dtypes_bad_arg_raises(self): |
377 | 379 | df = DataFrame({'a': list('abc'), |
@@ -502,61 +504,59 @@ def test_astype_with_view(self): |
502 | 504 | tf = self.frame.astype(np.float64) |
503 | 505 | casted = tf.astype(np.int64, copy=False) # noqa |
504 | 506 |
|
505 | | - def test_astype_cast_nan_inf_int(self): |
506 | | - # GH14265, check nan and inf raise error when converting to int |
507 | | - types = [np.int32, np.int64] |
508 | | - values = [np.nan, np.inf] |
509 | | - msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' |
| 507 | + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) |
| 508 | + @pytest.mark.parametrize("val", [np.nan, np.inf]) |
| 509 | + def test_astype_cast_nan_inf_int(self, val, dtype): |
| 510 | + # see gh-14265 |
| 511 | + # |
| 512 | + # Check NaN and inf --> raise error when converting to int. |
| 513 | + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" |
| 514 | + df = DataFrame([val]) |
510 | 515 |
|
511 | | - for this_type in types: |
512 | | - for this_val in values: |
513 | | - df = DataFrame([this_val]) |
514 | | - with tm.assert_raises_regex(ValueError, msg): |
515 | | - df.astype(this_type) |
| 516 | + with tm.assert_raises_regex(ValueError, msg): |
| 517 | + df.astype(dtype) |
516 | 518 |
|
517 | | - def test_astype_str(self): |
518 | | - # GH9757 |
519 | | - a = Series(date_range('2010-01-04', periods=5)) |
520 | | - b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern')) |
521 | | - c = Series([Timedelta(x, unit='d') for x in range(5)]) |
| 519 | + def test_astype_str(self, text_dtype): |
| 520 | + # see gh-9757 |
| 521 | + a = Series(date_range("2010-01-04", periods=5)) |
| 522 | + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) |
| 523 | + c = Series([Timedelta(x, unit="d") for x in range(5)]) |
522 | 524 | d = Series(range(5)) |
523 | 525 | e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) |
524 | 526 |
|
525 | | - df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d, 'e': e}) |
526 | | - |
527 | | - # datetimelike |
528 | | - # Test str and unicode on python 2.x and just str on python 3.x |
529 | | - for tt in set([str, compat.text_type]): |
530 | | - result = df.astype(tt) |
531 | | - |
532 | | - expected = DataFrame({ |
533 | | - 'a': list(map(tt, map(lambda x: Timestamp(x)._date_repr, |
534 | | - a._values))), |
535 | | - 'b': list(map(tt, map(Timestamp, b._values))), |
536 | | - 'c': list(map(tt, map(lambda x: Timedelta(x) |
537 | | - ._repr_base(format='all'), c._values))), |
538 | | - 'd': list(map(tt, d._values)), |
539 | | - 'e': list(map(tt, e._values)), |
540 | | - }) |
541 | | - |
542 | | - assert_frame_equal(result, expected) |
543 | | - |
544 | | - # float/nan |
545 | | - # 11302 |
546 | | - # consistency in astype(str) |
547 | | - for tt in set([str, compat.text_type]): |
548 | | - result = DataFrame([np.NaN]).astype(tt) |
549 | | - expected = DataFrame(['nan']) |
550 | | - assert_frame_equal(result, expected) |
551 | | - |
552 | | - result = DataFrame([1.12345678901234567890]).astype(tt) |
553 | | - if _np_version_under1p14: |
554 | | - # < 1.14 truncates |
555 | | - expected = DataFrame(['1.12345678901']) |
556 | | - else: |
557 | | - # >= 1.14 preserves the full repr |
558 | | - expected = DataFrame(['1.1234567890123457']) |
559 | | - assert_frame_equal(result, expected) |
| 527 | + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) |
| 528 | + |
| 529 | + # Datetime-like |
| 530 | + # Test str and unicode on Python 2.x and just str on Python 3.x |
| 531 | + result = df.astype(text_dtype) |
| 532 | + |
| 533 | + expected = DataFrame({ |
| 534 | + "a": list(map(text_dtype, |
| 535 | + map(lambda x: Timestamp(x)._date_repr, a._values))), |
| 536 | + "b": list(map(text_dtype, map(Timestamp, b._values))), |
| 537 | + "c": list(map(text_dtype, |
| 538 | + map(lambda x: Timedelta(x)._repr_base(format="all"), |
| 539 | + c._values))), |
| 540 | + "d": list(map(text_dtype, d._values)), |
| 541 | + "e": list(map(text_dtype, e._values)), |
| 542 | + }) |
| 543 | + |
| 544 | + assert_frame_equal(result, expected) |
| 545 | + |
| 546 | + def test_astype_str_float(self, text_dtype): |
| 547 | + # see gh-11302 |
| 548 | + result = DataFrame([np.NaN]).astype(text_dtype) |
| 549 | + expected = DataFrame(["nan"]) |
| 550 | + |
| 551 | + assert_frame_equal(result, expected) |
| 552 | + result = DataFrame([1.12345678901234567890]).astype(text_dtype) |
| 553 | + |
| 554 | + # < 1.14 truncates |
| 555 | + # >= 1.14 preserves the full repr |
| 556 | + val = ("1.12345678901" if _np_version_under1p14 |
| 557 | + else "1.1234567890123457") |
| 558 | + expected = DataFrame([val]) |
| 559 | + assert_frame_equal(result, expected) |
560 | 560 |
|
561 | 561 | @pytest.mark.parametrize("dtype_class", [dict, Series]) |
562 | 562 | def test_astype_dict_like(self, dtype_class): |
|
0 commit comments